From f800fdf4231b6a1ee9c9b9d0915276d52dac8e3b Mon Sep 17 00:00:00 2001
From: cpan <tomgwang@sbcglobal.net>
Date: Sun, 16 Feb 2020 10:59:22 +0000
Subject: [PATCH] Upload files to ''

---
 thinkdsp.py    | 1830 +++++++++++++++++++++++++++++
 thinkplot.py   |  890 ++++++++++++++
 thinkstats2.py | 3041 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 5761 insertions(+)
 create mode 100644 thinkdsp.py
 create mode 100644 thinkplot.py
 create mode 100644 thinkstats2.py

diff --git a/thinkdsp.py b/thinkdsp.py
new file mode 100644
index 0000000..066f19e
--- /dev/null
+++ b/thinkdsp.py
@@ -0,0 +1,1830 @@
+"""This file contains code used in "Think DSP",
+by Allen B. Downey, available from greenteapress.com
+
+Copyright 2013 Allen B. Downey
+License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
+"""
+
+from __future__ import print_function, division
+
+import array
+import copy
+import math
+
+import numpy as np
+import random
+import scipy
+import scipy.stats
+import scipy.fftpack
+import struct
+import subprocess
+import thinkplot
+import warnings
+
+from fractions import gcd
+from wave import open as open_wave
+
+import matplotlib.pyplot as pyplot
+
+try:
+    from IPython.display import Audio
+except:
+    warnings.warn(
+        "Can't import Audio from IPython.display; " "Wave.make_audio() will not work."
+    )
+
+PI2 = math.pi * 2
+
+
+def random_seed(x):
+    """Initialize the random and np.random generators.
+
+    x: int seed
+    """
+    random.seed(x)
+    np.random.seed(x)
+
+
+class UnimplementedMethodException(Exception):
+    """Exception if someone calls a method that should be overridden."""
+
+
+class WavFileWriter:
+    """Writes wav files."""
+
+    def __init__(self, filename="sound.wav", framerate=11025):
+        """Opens the file and sets parameters.
+
+        filename: string
+        framerate: samples per second
+        """
+        self.filename = filename
+        self.framerate = framerate
+        self.nchannels = 1
+        self.sampwidth = 2
+        self.bits = self.sampwidth * 8
+        self.bound = 2 ** (self.bits - 1) - 1
+
+        self.fmt = "h"
+        self.dtype = np.int16
+
+        self.fp = open_wave(self.filename, "w")
+        self.fp.setnchannels(self.nchannels)
+        self.fp.setsampwidth(self.sampwidth)
+        self.fp.setframerate(self.framerate)
+
+    def write(self, wave):
+        """Writes a wave.
+
+        wave: Wave
+        """
+        zs = wave.quantize(self.bound, self.dtype)
+        self.fp.writeframes(zs.tostring())
+
+    def close(self, duration=0):
+        """Closes the file.
+
+        duration: how many seconds of silence to append
+        """
+        if duration:
+            self.write(rest(duration))
+
+        self.fp.close()
+
+
+def read_wave(filename="sound.wav"):
+    """Reads a wave file.
+
+    filename: string
+
+    returns: Wave
+    """
+    fp = open_wave(filename, "r")
+
+    nchannels = fp.getnchannels()
+    nframes = fp.getnframes()
+    sampwidth = fp.getsampwidth()
+    framerate = fp.getframerate()
+
+    z_str = fp.readframes(nframes)
+
+    fp.close()
+
+    dtype_map = {1: np.int8, 2: np.int16, 3: "special", 4: np.int32}
+    if sampwidth not in dtype_map:
+        raise ValueError("sampwidth %d unknown" % sampwidth)
+
+    if sampwidth == 3:
+        xs = np.fromstring(z_str, dtype=np.int8).astype(np.int32)
+        ys = (xs[2::3] * 256 + xs[1::3]) * 256 + xs[0::3]
+    else:
+        ys = np.fromstring(z_str, dtype=dtype_map[sampwidth])
+
+    # if it's in stereo, just pull out the first channel
+    if nchannels == 2:
+        ys = ys[::2]
+
+    # ts = np.arange(len(ys)) / framerate
+    wave = Wave(ys, framerate=framerate)
+    wave.normalize()
+    return wave
+
+
+def play_wave(filename="sound.wav", player="aplay"):
+    """Plays a wave file.
+
+    filename: string
+    player: string name of executable that plays wav files
+    """
+    cmd = "%s %s" % (player, filename)
+    popen = subprocess.Popen(cmd, shell=True)
+    popen.communicate()
+
+
+def find_index(x, xs):
+    """Find the index corresponding to a given value in an array."""
+    n = len(xs)
+    start = xs[0]
+    end = xs[-1]
+    i = round((n - 1) * (x - start) / (end - start))
+    return int(i)
+
+
+class _SpectrumParent:
+    """Contains code common to Spectrum and DCT.
+    """
+
+    def __init__(self, hs, fs, framerate, full=False):
+        """Initializes a spectrum.
+
+        hs: array of amplitudes (real or complex)
+        fs: array of frequencies
+        framerate: frames per second
+        full: boolean to indicate full or real FFT
+        """
+        self.hs = np.asanyarray(hs)
+        self.fs = np.asanyarray(fs)
+        self.framerate = framerate
+        self.full = full
+
+    @property
+    def max_freq(self):
+        """Returns the Nyquist frequency for this spectrum."""
+        return self.framerate / 2
+
+    @property
+    def amps(self):
+        """Returns a sequence of amplitudes (read-only property)."""
+        return np.absolute(self.hs)
+
+    @property
+    def power(self):
+        """Returns a sequence of powers (read-only property)."""
+        return self.amps ** 2
+
+    def copy(self):
+        """Makes a copy.
+
+        Returns: new Spectrum
+        """
+        return copy.deepcopy(self)
+
+    def max_diff(self, other):
+        """Computes the maximum absolute difference between spectra.
+
+        other: Spectrum
+
+        returns: float
+        """
+        assert self.framerate == other.framerate
+        assert len(self) == len(other)
+
+        hs = self.hs - other.hs
+        return np.max(np.abs(hs))
+
+    def ratio(self, denom, thresh=1, val=0):
+        """The ratio of two spectrums.
+
+        denom: Spectrum
+        thresh: values smaller than this are replaced
+        val: with this value
+
+        returns: new Wave
+        """
+        ratio_spectrum = self.copy()
+        ratio_spectrum.hs /= denom.hs
+        ratio_spectrum.hs[denom.amps < thresh] = val
+        return ratio_spectrum
+
+    def invert(self):
+        """Inverts this spectrum/filter.
+
+        returns: new Wave
+        """
+        inverse = self.copy()
+        inverse.hs = 1 / inverse.hs
+        return inverse
+
+    @property
+    def freq_res(self):
+        return self.framerate / 2 / (len(self.fs) - 1)
+
+    def render_full(self, high=None):
+        """Extracts amps and fs from a full spectrum.
+
+        high: cutoff frequency
+
+        returns: fs, amps
+        """
+        hs = np.fft.fftshift(self.hs)
+        amps = np.abs(hs)
+        fs = np.fft.fftshift(self.fs)
+        i = 0 if high is None else find_index(-high, fs)
+        j = None if high is None else find_index(high, fs) + 1
+        return fs[i:j], amps[i:j]
+
+    def plot(self, high=None, **options):
+        """Plots amplitude vs frequency.
+
+        Note: if this is a full spectrum, it ignores low and high
+
+        high: frequency to cut off at
+        """
+        if self.full:
+            fs, amps = self.render_full(high)
+            thinkplot.plot(fs, amps, **options)
+        else:
+            i = None if high is None else find_index(high, self.fs)
+            thinkplot.plot(self.fs[:i], self.amps[:i], **options)
+
+    def plot_power(self, high=None, **options):
+        """Plots power vs frequency.
+
+        high: frequency to cut off at
+        """
+        if self.full:
+            fs, amps = self.render_full(high)
+            thinkplot.plot(fs, amps ** 2, **options)
+        else:
+            i = None if high is None else find_index(high, self.fs)
+            thinkplot.plot(self.fs[:i], self.power[:i], **options)
+
+    def estimate_slope(self):
+        """Runs linear regression on log power vs log frequency.
+
+        returns: slope, inter, r2, p, stderr
+        """
+        x = np.log(self.fs[1:])
+        y = np.log(self.power[1:])
+        t = scipy.stats.linregress(x, y)
+        return t
+
+    def peaks(self):
+        """Finds the highest peaks and their frequencies.
+
+        returns: sorted list of (amplitude, frequency) pairs
+        """
+        t = list(zip(self.amps, self.fs))
+        t.sort(reverse=True)
+        return t
+
+
+class Spectrum(_SpectrumParent):
+    """Represents the spectrum of a signal."""
+
+    def __len__(self):
+        """Length of the spectrum."""
+        return len(self.hs)
+
+    def __add__(self, other):
+        """Adds two spectrums elementwise.
+
+        other: Spectrum
+
+        returns: new Spectrum
+        """
+        if other == 0:
+            return self.copy()
+
+        assert all(self.fs == other.fs)
+        hs = self.hs + other.hs
+        return Spectrum(hs, self.fs, self.framerate, self.full)
+
+    __radd__ = __add__
+
+    def __mul__(self, other):
+        """Multiplies two spectrums elementwise.
+
+        other: Spectrum
+
+        returns: new Spectrum
+        """
+        assert all(self.fs == other.fs)
+        hs = self.hs * other.hs
+        return Spectrum(hs, self.fs, self.framerate, self.full)
+
+    def convolve(self, other):
+        """Convolves two Spectrums.
+
+        other: Spectrum
+
+        returns: Spectrum
+        """
+        assert all(self.fs == other.fs)
+        if self.full:
+            hs1 = np.fft.fftshift(self.hs)
+            hs2 = np.fft.fftshift(other.hs)
+            hs = np.convolve(hs1, hs2, mode="same")
+            hs = np.fft.ifftshift(hs)
+        else:
+            # not sure this branch would mean very much
+            hs = np.convolve(self.hs, other.hs, mode="same")
+
+        return Spectrum(hs, self.fs, self.framerate, self.full)
+
+    @property
+    def real(self):
+        """Returns the real part of the hs (read-only property)."""
+        return np.real(self.hs)
+
+    @property
+    def imag(self):
+        """Returns the imaginary part of the hs (read-only property)."""
+        return np.imag(self.hs)
+
+    @property
+    def angles(self):
+        """Returns a sequence of angles (read-only property)."""
+        return np.angle(self.hs)
+
+    def scale(self, factor):
+        """Multiplies all elements by the given factor.
+
+        factor: what to multiply the magnitude by (could be complex)
+        """
+        self.hs *= factor
+
+    def low_pass(self, cutoff, factor=0):
+        """Attenuate frequencies above the cutoff.
+
+        cutoff: frequency in Hz
+        factor: what to multiply the magnitude by
+        """
+        self.hs[abs(self.fs) > cutoff] *= factor
+
+    def high_pass(self, cutoff, factor=0):
+        """Attenuate frequencies below the cutoff.
+
+        cutoff: frequency in Hz
+        factor: what to multiply the magnitude by
+        """
+        self.hs[abs(self.fs) < cutoff] *= factor
+
+    def band_stop(self, low_cutoff, high_cutoff, factor=0):
+        """Attenuate frequencies between the cutoffs.
+
+        low_cutoff: frequency in Hz
+        high_cutoff: frequency in Hz
+        factor: what to multiply the magnitude by
+        """
+        # TODO: test this function
+        fs = abs(self.fs)
+        indices = (low_cutoff < fs) & (fs < high_cutoff)
+        self.hs[indices] *= factor
+
+    def pink_filter(self, beta=1):
+        """Apply a filter that would make white noise pink.
+
+        beta: exponent of the pink noise
+        """
+        denom = self.fs ** (beta / 2.0)
+        denom[0] = 1
+        self.hs /= denom
+
+    def differentiate(self):
+        """Apply the differentiation filter.
+
+        returns: new Spectrum
+        """
+        new = self.copy()
+        new.hs *= PI2 * 1j * new.fs
+        return new
+
+    def integrate(self):
+        """Apply the integration filter.
+
+        returns: new Spectrum
+        """
+        new = self.copy()
+        new.hs /= PI2 * 1j * new.fs
+        return new
+
+    def make_integrated_spectrum(self):
+        """Makes an integrated spectrum.
+        """
+        cs = np.cumsum(self.power)
+        cs /= cs[-1]
+        return IntegratedSpectrum(cs, self.fs)
+
+    def make_wave(self):
+        """Transforms to the time domain.
+
+        returns: Wave
+        """
+        if self.full:
+            ys = np.fft.ifft(self.hs)
+        else:
+            ys = np.fft.irfft(self.hs)
+
+        # NOTE: whatever the start time was, we lose it when
+        # we transform back; we could fix that by saving start
+        # time in the Spectrum
+        # ts = self.start + np.arange(len(ys)) / self.framerate
+        return Wave(ys, framerate=self.framerate)
+
+
+class IntegratedSpectrum:
+    """Represents the integral of a spectrum."""
+
+    def __init__(self, cs, fs):
+        """Initializes an integrated spectrum:
+
+        cs: sequence of cumulative amplitudes
+        fs: sequence of frequencies
+        """
+        self.cs = np.asanyarray(cs)
+        self.fs = np.asanyarray(fs)
+
+    def plot_power(self, low=0, high=None, expo=False, **options):
+        """Plots the integrated spectrum.
+
+        low: int index to start at
+        high: int index to end at
+        """
+        cs = self.cs[low:high]
+        fs = self.fs[low:high]
+
+        if expo:
+            cs = np.exp(cs)
+
+        thinkplot.plot(fs, cs, **options)
+
+    def estimate_slope(self, low=1, high=-12000):
+        """Runs linear regression on log cumulative power vs log frequency.
+
+        returns: slope, inter, r2, p, stderr
+        """
+        # print self.fs[low:high]
+        # print self.cs[low:high]
+        x = np.log(self.fs[low:high])
+        y = np.log(self.cs[low:high])
+        t = scipy.stats.linregress(x, y)
+        return t
+
+
+class Dct(_SpectrumParent):
+    """Represents the spectrum of a signal using discrete cosine transform."""
+
+    @property
+    def amps(self):
+        """Returns a sequence of amplitudes (read-only property).
+
+        Note: for DCTs, amps are positive or negative real.
+        """
+        return self.hs
+
+    def __add__(self, other):
+        """Adds two DCTs elementwise.
+
+        other: DCT
+
+        returns: new DCT
+        """
+        if other == 0:
+            return self
+
+        assert self.framerate == other.framerate
+        hs = self.hs + other.hs
+        return Dct(hs, self.fs, self.framerate)
+
+    __radd__ = __add__
+
+    def make_wave(self):
+        """Transforms to the time domain.
+
+        returns: Wave
+        """
+        N = len(self.hs)
+        ys = scipy.fftpack.idct(self.hs, type=2) / 2 / N
+        # NOTE: whatever the start time was, we lose it when
+        # we transform back
+        # ts = self.start + np.arange(len(ys)) / self.framerate
+        return Wave(ys, framerate=self.framerate)
+
+
+class Spectrogram:
+    """Represents the spectrum of a signal."""
+
+    def __init__(self, spec_map, seg_length):
+        """Initialize the spectrogram.
+
+        spec_map: map from float time to Spectrum
+        seg_length: number of samples in each segment
+        """
+        self.spec_map = spec_map
+        self.seg_length = seg_length
+
+    def any_spectrum(self):
+        """Returns an arbitrary spectrum from the spectrogram."""
+        index = next(iter(self.spec_map))
+        return self.spec_map[index]
+
+    @property
+    def time_res(self):
+        """Time resolution in seconds."""
+        spectrum = self.any_spectrum()
+        return float(self.seg_length) / spectrum.framerate
+
+    @property
+    def freq_res(self):
+        """Frequency resolution in Hz."""
+        return self.any_spectrum().freq_res
+
+    def times(self):
+        """Sorted sequence of times.
+
+        returns: sequence of float times in seconds
+        """
+        ts = sorted(iter(self.spec_map))
+        return ts
+
+    def frequencies(self):
+        """Sequence of frequencies.
+
+        returns: sequence of float freqencies in Hz.
+        """
+        fs = self.any_spectrum().fs
+        return fs
+
+    def plot(self, high=None, **options):
+        """Make a pseudocolor plot.
+
+        high: highest frequency component to plot
+        """
+        fs = self.frequencies()
+        i = None if high is None else find_index(high, fs)
+        fs = fs[:i]
+        ts = self.times()
+
+        # make the array
+        size = len(fs), len(ts)
+        array = np.zeros(size, dtype=np.float)
+
+        # copy amplitude from each spectrum into a column of the array
+        for j, t in enumerate(ts):
+            spectrum = self.spec_map[t]
+            array[:, j] = spectrum.amps[:i]
+
+        thinkplot.pcolor(ts, fs, array, **options)
+
+    def make_wave(self):
+        """Inverts the spectrogram and returns a Wave.
+
+        returns: Wave
+        """
+        res = []
+        for t, spectrum in sorted(self.spec_map.items()):
+            wave = spectrum.make_wave()
+            n = len(wave)
+
+            window = 1 / np.hamming(n)
+            wave.window(window)
+
+            i = wave.find_index(t)
+            start = i - n // 2
+            end = start + n
+            res.append((start, end, wave))
+
+        starts, ends, waves = zip(*res)
+        low = min(starts)
+        high = max(ends)
+
+        ys = np.zeros(high - low, np.float)
+        for start, end, wave in res:
+            ys[start:end] = wave.ys
+
+        # ts = np.arange(len(ys)) / self.framerate
+        return Wave(ys, framerate=wave.framerate)
+
+
+class Wave:
+    """Represents a discrete-time waveform.
+
+    """
+
+    def __init__(self, ys, ts=None, framerate=None):
+        """Initializes the wave.
+
+        ys: wave array
+        ts: array of times
+        framerate: samples per second
+        """
+        self.ys = np.asanyarray(ys)
+        self.framerate = framerate if framerate is not None else 11025
+
+        if ts is None:
+            self.ts = np.arange(len(ys)) / self.framerate
+        else:
+            self.ts = np.asanyarray(ts)
+
+    def copy(self):
+        """Makes a copy.
+
+        Returns: new Wave
+        """
+        return copy.deepcopy(self)
+
+    def __len__(self):
+        return len(self.ys)
+
+    @property
+    def start(self):
+        return self.ts[0]
+
+    @property
+    def end(self):
+        return self.ts[-1]
+
+    @property
+    def duration(self):
+        """Duration (property).
+
+        returns: float duration in seconds
+        """
+        return len(self.ys) / self.framerate
+
+    def __add__(self, other):
+        """Adds two waves elementwise.
+
+        other: Wave
+
+        returns: new Wave
+        """
+        if other == 0:
+            return self
+
+        assert self.framerate == other.framerate
+
+        # make an array of times that covers both waves
+        start = min(self.start, other.start)
+        end = max(self.end, other.end)
+        n = int(round((end - start) * self.framerate)) + 1
+        ys = np.zeros(n)
+        ts = start + np.arange(n) / self.framerate
+
+        def add_ys(wave):
+            i = find_index(wave.start, ts)
+
+            # make sure the arrays line up reasonably well
+            diff = ts[i] - wave.start
+            dt = 1 / wave.framerate
+            if (diff / dt) > 0.1:
+                warnings.warn(
+                    "Can't add these waveforms; their " "time arrays don't line up."
+                )
+
+            j = i + len(wave)
+            ys[i:j] += wave.ys
+
+        add_ys(self)
+        add_ys(other)
+
+        return Wave(ys, ts, self.framerate)
+
+    __radd__ = __add__
+
+    def __or__(self, other):
+        """Concatenates two waves.
+
+        other: Wave
+
+        returns: new Wave
+        """
+        if self.framerate != other.framerate:
+            raise ValueError("Wave.__or__: framerates do not agree")
+
+        ys = np.concatenate((self.ys, other.ys))
+        # ts = np.arange(len(ys)) / self.framerate
+        return Wave(ys, framerate=self.framerate)
+
+    def __mul__(self, other):
+        """Multiplies two waves elementwise.
+
+        Note: this operation ignores the timestamps; the result
+        has the timestamps of self.
+
+        other: Wave
+
+        returns: new Wave
+        """
+        # the spectrums have to have the same framerate and duration
+        assert self.framerate == other.framerate
+        assert len(self) == len(other)
+
+        ys = self.ys * other.ys
+        return Wave(ys, self.ts, self.framerate)
+
+    def max_diff(self, other):
+        """Computes the maximum absolute difference between waves.
+
+        other: Wave
+
+        returns: float
+        """
+        assert self.framerate == other.framerate
+        assert len(self) == len(other)
+
+        ys = self.ys - other.ys
+        return np.max(np.abs(ys))
+
+    def convolve(self, other):
+        """Convolves two waves.
+
+        Note: this operation ignores the timestamps; the result
+        has the timestamps of self.
+
+        other: Wave or NumPy array
+
+        returns: Wave
+        """
+        if isinstance(other, Wave):
+            assert self.framerate == other.framerate
+            window = other.ys
+        else:
+            window = other
+
+        ys = np.convolve(self.ys, window, mode="full")
+        # ts = np.arange(len(ys)) / self.framerate
+        return Wave(ys, framerate=self.framerate)
+
+    def diff(self):
+        """Computes the difference between successive elements.
+
+        returns: new Wave
+        """
+        ys = np.diff(self.ys)
+        ts = self.ts[1:].copy()
+        return Wave(ys, ts, self.framerate)
+
+    def cumsum(self):
+        """Computes the cumulative sum of the elements.
+
+        returns: new Wave
+        """
+        ys = np.cumsum(self.ys)
+        ts = self.ts.copy()
+        return Wave(ys, ts, self.framerate)
+
+    def quantize(self, bound, dtype):
+        """Maps the waveform to quanta.
+
+        bound: maximum amplitude
+        dtype: numpy data type or string
+
+        returns: quantized signal
+        """
+        return quantize(self.ys, bound, dtype)
+
+    def apodize(self, denom=20, duration=0.1):
+        """Tapers the amplitude at the beginning and end of the signal.
+
+        Tapers either the given duration of time or the given
+        fraction of the total duration, whichever is less.
+
+        denom: float fraction of the segment to taper
+        duration: float duration of the taper in seconds
+        """
+        self.ys = apodize(self.ys, self.framerate, denom, duration)
+
+    def hamming(self):
+        """Apply a Hamming window to the wave.
+        """
+        self.ys *= np.hamming(len(self.ys))
+
+    def window(self, window):
+        """Apply a window to the wave.
+
+        window: sequence of multipliers, same length as self.ys
+        """
+        self.ys *= window
+
+    def scale(self, factor):
+        """Multplies the wave by a factor.
+
+        factor: scale factor
+        """
+        self.ys *= factor
+
+    def shift(self, shift):
+        """Shifts the wave left or right in time.
+
+        shift: float time shift
+        """
+        # TODO: track down other uses of this function and check them
+        self.ts += shift
+
+    def roll(self, roll):
+        """Rolls this wave by the given number of locations.
+        """
+        self.ys = np.roll(self.ys, roll)
+
+    def truncate(self, n):
+        """Trims this wave to the given length.
+
+        n: integer index
+        """
+        self.ys = truncate(self.ys, n)
+        self.ts = truncate(self.ts, n)
+
+    def zero_pad(self, n):
+        """Trims this wave to the given length.
+
+        n: integer index
+        """
+        self.ys = zero_pad(self.ys, n)
+        self.ts = self.start + np.arange(n) / self.framerate
+
+    def normalize(self, amp=1.0):
+        """Normalizes the signal to the given amplitude.
+
+        amp: float amplitude
+        """
+        self.ys = normalize(self.ys, amp=amp)
+
+    def unbias(self):
+        """Unbiases the signal.
+        """
+        self.ys = unbias(self.ys)
+
+    def find_index(self, t):
+        """Find the index corresponding to a given time."""
+        n = len(self)
+        start = self.start
+        end = self.end
+        i = round((n - 1) * (t - start) / (end - start))
+        return int(i)
+
+    def segment(self, start=None, duration=None):
+        """Extracts a segment.
+
+        start: float start time in seconds
+        duration: float duration in seconds
+
+        returns: Wave
+        """
+        if start is None:
+            start = self.ts[0]
+            i = 0
+        else:
+            i = self.find_index(start)
+
+        j = None if duration is None else self.find_index(start + duration)
+        return self.slice(i, j)
+
+    def slice(self, i, j):
+        """Makes a slice from a Wave.
+
+        i: first slice index
+        j: second slice index
+        """
+        ys = self.ys[i:j].copy()
+        ts = self.ts[i:j].copy()
+        return Wave(ys, ts, self.framerate)
+
+    def make_spectrum(self, full=False):
+        """Computes the spectrum using FFT.
+
+        returns: Spectrum
+        """
+        n = len(self.ys)
+        d = 1 / self.framerate
+
+        if full:
+            hs = np.fft.fft(self.ys)
+            fs = np.fft.fftfreq(n, d)
+        else:
+            hs = np.fft.rfft(self.ys)
+            fs = np.fft.rfftfreq(n, d)
+
+        return Spectrum(hs, fs, self.framerate, full)
+
+    def make_dct(self):
+        """Computes the DCT of this wave.
+        """
+        N = len(self.ys)
+        hs = scipy.fftpack.dct(self.ys, type=2)
+        fs = (0.5 + np.arange(N)) / 2
+        return Dct(hs, fs, self.framerate)
+
+    def make_spectrogram(self, seg_length, win_flag=True):
+        """Computes the spectrogram of the wave.
+
+        seg_length: number of samples in each segment
+        win_flag: boolean, whether to apply hamming window to each segment
+
+        returns: Spectrogram
+        """
+        if win_flag:
+            window = np.hamming(seg_length)
+        i, j = 0, seg_length
+        step = int(seg_length // 2)
+
+        # map from time to Spectrum
+        spec_map = {}
+
+        while j < len(self.ys):
+            segment = self.slice(i, j)
+            if win_flag:
+                segment.window(window)
+
+            # the nominal time for this segment is the midpoint
+            t = (segment.start + segment.end) / 2
+            spec_map[t] = segment.make_spectrum()
+
+            i += step
+            j += step
+
+        return Spectrogram(spec_map, seg_length)
+
+    def get_xfactor(self, options):
+        try:
+            xfactor = options["xfactor"]
+            options.pop("xfactor")
+        except KeyError:
+            xfactor = 1
+        return xfactor
+
+    def plot(self, **options):
+        """Plots the wave.
+
+        """
+        xfactor = self.get_xfactor(options)
+        thinkplot.plot(self.ts * xfactor, self.ys, **options)
+
+    def plot_vlines(self, **options):
+        """Plots the wave with vertical lines for samples.
+
+        """
+        xfactor = self.get_xfactor(options)
+        thinkplot.vlines(self.ts * xfactor, 0, self.ys, **options)
+
+    def corr(self, other):
+        """Correlation coefficient two waves.
+
+        other: Wave
+
+        returns: float coefficient of correlation
+        """
+        corr = np.corrcoef(self.ys, other.ys)[0, 1]
+        return corr
+
+    def cov_mat(self, other):
+        """Covariance matrix of two waves.
+
+        other: Wave
+
+        returns: 2x2 covariance matrix
+        """
+        return np.cov(self.ys, other.ys)
+
+    def cov(self, other):
+        """Covariance of two unbiased waves.
+
+        other: Wave
+
+        returns: float
+        """
+        total = sum(self.ys * other.ys) / len(self.ys)
+        return total
+
+    def cos_cov(self, k):
+        """Covariance with a cosine signal.
+
+        freq: freq of the cosine signal in Hz
+
+        returns: float covariance
+        """
+        n = len(self.ys)
+        factor = math.pi * k / n
+        ys = [math.cos(factor * (i + 0.5)) for i in range(n)]
+        total = 2 * sum(self.ys * ys)
+        return total
+
+    def cos_transform(self):
+        """Discrete cosine transform.
+
+        returns: list of frequency, cov pairs
+        """
+        n = len(self.ys)
+        res = []
+        for k in range(n):
+            cov = self.cos_cov(k)
+            res.append((k, cov))
+
+        return res
+
+    def write(self, filename="sound.wav"):
+        """Write a wave file.
+
+        filename: string
+        """
+        print("Writing", filename)
+        wfile = WavFileWriter(filename, self.framerate)
+        wfile.write(self)
+        wfile.close()
+
+    def play(self, filename="sound.wav"):
+        """Plays a wave file.
+
+        filename: string
+        """
+        self.write(filename)
+        play_wave(filename)
+
+    def make_audio(self):
+        """Makes an IPython Audio object.
+        """
+        audio = Audio(data=self.ys.real, rate=self.framerate)
+        return audio
+
+
+def unbias(ys):
+    """Shifts a wave array so it has mean 0.
+
+    ys: wave array
+
+    returns: wave array
+    """
+    return ys - ys.mean()
+
+
+def normalize(ys, amp=1.0):
+    """Normalizes a wave array so the maximum amplitude is +amp or -amp.
+
+    ys: wave array
+    amp: max amplitude (pos or neg) in result
+
+    returns: wave array
+    """
+    high, low = abs(max(ys)), abs(min(ys))
+    return amp * ys / max(high, low)
+
+
+def shift_right(ys, shift):
+    """Shifts a wave array to the right and zero pads.
+
+    ys: wave array
+    shift: integer shift
+
+    returns: wave array
+    """
+    res = np.zeros(len(ys) + shift)
+    res[shift:] = ys
+    return res
+
+
+def shift_left(ys, shift):
+    """Shifts a wave array to the left.
+
+    ys: wave array
+    shift: integer shift
+
+    returns: wave array
+    """
+    return ys[shift:]
+
+
+def truncate(ys, n):
+    """Trims a wave array to the given length.
+
+    ys: wave array
+    n: integer length
+
+    returns: wave array
+    """
+    return ys[:n]
+
+
+def quantize(ys, bound, dtype):
+    """Maps the waveform to quanta.
+
+    ys: wave array
+    bound: maximum amplitude
+    dtype: numpy data type of the result
+
+    returns: quantized signal
+    """
+    if max(ys) > 1 or min(ys) < -1:
+        warnings.warn("Warning: normalizing before quantizing.")
+        ys = normalize(ys)
+
+    zs = (ys * bound).astype(dtype)
+    return zs
+
+
+def apodize(ys, framerate, denom=20, duration=0.1):
+    """Tapers the amplitude at the beginning and end of the signal.
+
+    Tapers either the given duration of time or the given
+    fraction of the total duration, whichever is less.
+
+    ys: wave array
+    framerate: int frames per second
+    denom: float fraction of the segment to taper
+    duration: float duration of the taper in seconds
+
+    returns: wave array
+    """
+    # a fixed fraction of the segment
+    n = len(ys)
+    k1 = n // denom
+
+    # a fixed duration of time
+    k2 = int(duration * framerate)
+
+    k = min(k1, k2)
+
+    w1 = np.linspace(0, 1, k)
+    w2 = np.ones(n - 2 * k)
+    w3 = np.linspace(1, 0, k)
+
+    window = np.concatenate((w1, w2, w3))
+    return ys * window
+
+
+class Signal:
+    """Represents a time-varying signal."""
+
+    def __add__(self, other):
+        """Adds two signals.
+
+        other: Signal
+
+        returns: Signal
+        """
+        if other == 0:
+            return self
+        return SumSignal(self, other)
+
+    __radd__ = __add__
+
+    @property
+    def period(self):
+        """Period of the signal in seconds (property).
+
+        Since this is used primarily for purposes of plotting,
+        the default behavior is to return a value, 0.1 seconds,
+        that is reasonable for many signals.
+
+        returns: float seconds
+        """
+        return 0.1
+
+    def plot(self, framerate=11025):
+        """Plots the signal.
+
+        The default behavior is to plot three periods.
+
+        framerate: samples per second
+        """
+        duration = self.period * 3
+        wave = self.make_wave(duration, start=0, framerate=framerate)
+        wave.plot()
+
+    def make_wave(self, duration=1, start=0, framerate=11025):
+        """Makes a Wave object.
+
+        duration: float seconds
+        start: float seconds
+        framerate: int frames per second
+
+        returns: Wave
+        """
+        n = round(duration * framerate)
+        ts = start + np.arange(n) / framerate
+        ys = self.evaluate(ts)
+        return Wave(ys, ts, framerate=framerate)
+
+
+def infer_framerate(ts):
+    """Given ts, find the framerate.
+
+    Assumes that the ts are equally spaced.
+
+    ts: sequence of times in seconds
+
+    returns: frames per second
+    """
+    # TODO: confirm that this is never used and remove it
+    dt = ts[1] - ts[0]
+    framerate = 1.0 / dt
+    return framerate
+
+
+class SumSignal(Signal):
+    """Represents the sum of signals."""
+
+    def __init__(self, *args):
+        """Initializes the sum.
+
+        args: tuple of signals
+        """
+        self.signals = args
+
+    @property
+    def period(self):
+        """Period of the signal in seconds.
+
+        Note: this is not correct; it's mostly a placekeeper.
+
+        But it is correct for a harmonic sequence where all
+        component frequencies are multiples of the fundamental.
+
+        returns: float seconds
+        """
+        return max(sig.period for sig in self.signals)
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ts = np.asarray(ts)
+        return sum(sig.evaluate(ts) for sig in self.signals)
+
+
+class Sinusoid(Signal):
+    """Represents a sinusoidal signal."""
+
+    def __init__(self, freq=440, amp=1.0, offset=0, func=np.sin):
+        """Initializes a sinusoidal signal.
+
+        freq: float frequency in Hz
+        amp: float amplitude, 1.0 is nominal max
+        offset: float phase offset in radians
+        func: function that maps phase to amplitude
+        """
+        self.freq = freq
+        self.amp = amp
+        self.offset = offset
+        self.func = func
+
+    @property
+    def period(self):
+        """Period of the signal in seconds.
+
+        returns: float seconds
+        """
+        return 1.0 / self.freq
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ts = np.asarray(ts)
+        phases = PI2 * self.freq * ts + self.offset
+        ys = self.amp * self.func(phases)
+        return ys
+
+
+def CosSignal(freq=440, amp=1.0, offset=0):
+    """Makes a cosine Sinusoid.
+
+    freq: float frequency in Hz
+    amp: float amplitude, 1.0 is nominal max
+    offset: float phase offset in radians
+
+    returns: Sinusoid object
+    """
+    return Sinusoid(freq, amp, offset, func=np.cos)
+
+
+def SinSignal(freq=440, amp=1.0, offset=0):
+    """Makes a sine Sinusoid.
+
+    freq: float frequency in Hz
+    amp: float amplitude, 1.0 is nominal max
+    offset: float phase offset in radians
+
+    returns: Sinusoid object
+    """
+    return Sinusoid(freq, amp, offset, func=np.sin)
+
+
+def Sinc(freq=440, amp=1.0, offset=0):
+    """Makes a Sinc function.
+
+    freq: float frequency in Hz
+    amp: float amplitude, 1.0 is nominal max
+    offset: float phase offset in radians
+
+    returns: Sinusoid object
+    """
+    return Sinusoid(freq, amp, offset, func=np.sinc)
+
+
+class ComplexSinusoid(Sinusoid):
+    """Represents a complex exponential signal."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ts = np.asarray(ts)
+        phases = PI2 * self.freq * ts + self.offset
+        ys = self.amp * np.exp(1j * phases)
+        return ys
+
+
+class SquareSignal(Sinusoid):
+    """Represents a square signal."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ts = np.asarray(ts)
+        cycles = self.freq * ts + self.offset / PI2
+        frac, _ = np.modf(cycles)
+        ys = self.amp * np.sign(unbias(frac))
+        return ys
+
+
+class SawtoothSignal(Sinusoid):
+    """Represents a sawtooth signal."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ts = np.asarray(ts)
+        cycles = self.freq * ts + self.offset / PI2
+        frac, _ = np.modf(cycles)
+        ys = normalize(unbias(frac), self.amp)
+        return ys
+
+
+class ParabolicSignal(Sinusoid):
+    """Represents a parabolic signal."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ts = np.asarray(ts)
+        cycles = self.freq * ts + self.offset / PI2
+        frac, _ = np.modf(cycles)
+        ys = (frac - 0.5) ** 2
+        ys = normalize(unbias(ys), self.amp)
+        return ys
+
+
+class CubicSignal(ParabolicSignal):
+    """Represents a cubic signal."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ys = ParabolicSignal.evaluate(self, ts)
+        ys = np.cumsum(ys)
+        ys = normalize(unbias(ys), self.amp)
+        return ys
+
+
+class GlottalSignal(Sinusoid):
+    """Represents a periodic signal that resembles a glottal signal."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ts = np.asarray(ts)
+        cycles = self.freq * ts + self.offset / PI2
+        frac, _ = np.modf(cycles)
+        ys = frac ** 2 * (1 - frac)
+        ys = normalize(unbias(ys), self.amp)
+        return ys
+
+
+class TriangleSignal(Sinusoid):
+    """Represents a triangle signal."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ts = np.asarray(ts)
+        cycles = self.freq * ts + self.offset / PI2
+        frac, _ = np.modf(cycles)
+        ys = np.abs(frac - 0.5)
+        ys = normalize(unbias(ys), self.amp)
+        return ys
+
+
+class Chirp(Signal):
+    """Represents a signal with variable frequency."""
+
+    def __init__(self, start=440, end=880, amp=1.0):
+        """Initializes a linear chirp.
+
+        start: float frequency in Hz
+        end: float frequency in Hz
+        amp: float amplitude, 1.0 is nominal max
+        """
+        self.start = start
+        self.end = end
+        self.amp = amp
+
+    @property
+    def period(self):
+        """Period of the signal in seconds.
+
+        returns: float seconds
+        """
+        return ValueError("Non-periodic signal.")
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        freqs = np.linspace(self.start, self.end, len(ts) - 1)
+        return self._evaluate(ts, freqs)
+
+    def _evaluate(self, ts, freqs):
+        """Helper function that evaluates the signal.
+
+        ts: float array of times
+        freqs: float array of frequencies during each interval
+        """
+        dts = np.diff(ts)
+        dps = PI2 * freqs * dts
+        phases = np.cumsum(dps)
+        phases = np.insert(phases, 0, 0)
+        ys = self.amp * np.cos(phases)
+        return ys
+
+
+class ExpoChirp(Chirp):
+    """Represents a signal with varying frequency."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        start, end = np.log10(self.start), np.log10(self.end)
+        freqs = np.logspace(start, end, len(ts) - 1)
+        return self._evaluate(ts, freqs)
+
+
+class SilentSignal(Signal):
+    """Represents silence."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        return np.zeros(len(ts))
+
+
+class Impulses(Signal):
+    """Represents silence."""
+
+    def __init__(self, locations, amps=1):
+        self.locations = np.asanyarray(locations)
+        self.amps = amps
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ys = np.zeros(len(ts))
+        indices = np.searchsorted(ts, self.locations)
+        ys[indices] = self.amps
+        return ys
+
+
+class _Noise(Signal):
+    """Represents a noise signal (abstract parent class)."""
+
+    def __init__(self, amp=1.0):
+        """Initializes a white noise signal.
+
+        amp: float amplitude, 1.0 is nominal max
+        """
+        self.amp = amp
+
+    @property
+    def period(self):
+        """Period of the signal in seconds.
+
+        returns: float seconds
+        """
+        return ValueError("Non-periodic signal.")
+
+
+class UncorrelatedUniformNoise(_Noise):
+    """Represents uncorrelated uniform noise."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ys = np.random.uniform(-self.amp, self.amp, len(ts))
+        return ys
+
+
+class UncorrelatedGaussianNoise(_Noise):
+    """Represents uncorrelated gaussian noise."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        ys = np.random.normal(0, self.amp, len(ts))
+        return ys
+
+
+class BrownianNoise(_Noise):
+    """Represents Brownian noise, aka red noise."""
+
+    def evaluate(self, ts):
+        """Evaluates the signal at the given times.
+
+        Computes Brownian noise by taking the cumulative sum of
+        a uniform random series.
+
+        ts: float array of times
+
+        returns: float wave array
+        """
+        dys = np.random.uniform(-1, 1, len(ts))
+        # ys = scipy.integrate.cumtrapz(dys, ts)
+        ys = np.cumsum(dys)
+        ys = normalize(unbias(ys), self.amp)
+        return ys
+
+
+class PinkNoise(_Noise):
+    """Represents Brownian noise, aka red noise."""
+
+    def __init__(self, amp=1.0, beta=1.0):
+        """Initializes a pink noise signal.
+
+        amp: float amplitude, 1.0 is nominal max
+        """
+        self.amp = amp
+        self.beta = beta
+
+    def make_wave(self, duration=1, start=0, framerate=11025):
+        """Makes a Wave object.
+
+        duration: float seconds
+        start: float seconds
+        framerate: int frames per second
+
+        returns: Wave
+        """
+        signal = UncorrelatedUniformNoise()
+        wave = signal.make_wave(duration, start, framerate)
+        spectrum = wave.make_spectrum()
+
+        spectrum.pink_filter(beta=self.beta)
+
+        wave2 = spectrum.make_wave()
+        wave2.unbias()
+        wave2.normalize(self.amp)
+        return wave2
+
+
+def rest(duration):
+    """Makes a rest of the given duration.
+
+    duration: float seconds
+
+    returns: Wave
+    """
+    signal = SilentSignal()
+    wave = signal.make_wave(duration)
+    return wave
+
+
+def make_note(midi_num, duration, sig_cons=CosSignal, framerate=11025):
+    """Make a MIDI note with the given duration.
+
+    midi_num: int MIDI note number
+    duration: float seconds
+    sig_cons: Signal constructor function
+    framerate: int frames per second
+
+    returns: Wave
+    """
+    freq = midi_to_freq(midi_num)
+    signal = sig_cons(freq)
+    wave = signal.make_wave(duration, framerate=framerate)
+    wave.apodize()
+    return wave
+
+
+def make_chord(midi_nums, duration, sig_cons=CosSignal, framerate=11025):
+    """Make a chord with the given duration.
+
+    midi_nums: sequence of int MIDI note numbers
+    duration: float seconds
+    sig_cons: Signal constructor function
+    framerate: int frames per second
+
+    returns: Wave
+    """
+    freqs = [midi_to_freq(num) for num in midi_nums]
+    signal = sum(sig_cons(freq) for freq in freqs)
+    wave = signal.make_wave(duration, framerate=framerate)
+    wave.apodize()
+    return wave
+
+
+def midi_to_freq(midi_num):
+    """Converts MIDI note number to frequency.
+
+    midi_num: int MIDI note number
+
+    returns: float frequency in Hz
+    """
+    x = (midi_num - 69) / 12.0
+    freq = 440.0 * 2 ** x
+    return freq
+
+
+def sin_wave(freq, duration=1, offset=0):
+    """Makes a sine wave with the given parameters.
+
+    freq: float cycles per second
+    duration: float seconds
+    offset: float radians
+
+    returns: Wave
+    """
+    signal = SinSignal(freq, offset=offset)
+    wave = signal.make_wave(duration)
+    return wave
+
+
+def cos_wave(freq, duration=1, offset=0):
+    """Makes a cosine wave with the given parameters.
+
+    freq: float cycles per second
+    duration: float seconds
+    offset: float radians
+
+    returns: Wave
+    """
+    signal = CosSignal(freq, offset=offset)
+    wave = signal.make_wave(duration)
+    return wave
+
+
+def mag(a):
+    """Computes the magnitude of a numpy array.
+
+    a: numpy array
+
+    returns: float
+    """
+    return np.sqrt(np.dot(a, a))
+
+
+def zero_pad(array, n):
+    """Extends an array with zeros.
+
+    array: numpy array
+    n: length of result
+
+    returns: new NumPy array
+    """
+    res = np.zeros(n)
+    res[: len(array)] = array
+    return res
+
+
+def main():
+
+    cos_basis = cos_wave(440)
+    sin_basis = sin_wave(440)
+
+    wave = cos_wave(440, offset=math.pi / 2)
+    cos_cov = cos_basis.cov(wave)
+    sin_cov = sin_basis.cov(wave)
+    print(cos_cov, sin_cov, mag((cos_cov, sin_cov)))
+    return
+
+    wfile = WavFileWriter()
+    for sig_cons in [
+        SinSignal,
+        TriangleSignal,
+        SawtoothSignal,
+        GlottalSignal,
+        ParabolicSignal,
+        SquareSignal,
+    ]:
+        print(sig_cons)
+        sig = sig_cons(440)
+        wave = sig.make_wave(1)
+        wave.apodize()
+        wfile.write(wave)
+    wfile.close()
+    return
+
+    signal = GlottalSignal(440)
+    signal.plot()
+    pyplot.show()
+    return
+
+    wfile = WavFileWriter()
+    for m in range(60, 0, -1):
+        wfile.write(make_note(m, 0.25))
+    wfile.close()
+    return
+
+    wave1 = make_note(69, 1)
+    wave2 = make_chord([69, 72, 76], 1)
+    wave = wave1 | wave2
+
+    wfile = WavFileWriter()
+    wfile.write(wave)
+    wfile.close()
+    return
+
+    sig1 = CosSignal(freq=440)
+    sig2 = CosSignal(freq=523.25)
+    sig3 = CosSignal(freq=660)
+    sig4 = CosSignal(freq=880)
+    sig5 = CosSignal(freq=987)
+    sig = sig1 + sig2 + sig3 + sig4
+
+    # wave = Wave(sig, duration=0.02)
+    # wave.plot()
+
+    wave = sig.make_wave(duration=1)
+    # wave.normalize()
+
+    wfile = WavFileWriter(wave)
+    wfile.write()
+    wfile.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/thinkplot.py b/thinkplot.py
new file mode 100644
index 0000000..e68f246
--- /dev/null
+++ b/thinkplot.py
@@ -0,0 +1,890 @@
+"""This file contains code for use with "Think Stats",
+by Allen B. Downey, available from greenteapress.com
+
+Copyright 2014 Allen B. Downey
+License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
+"""
+
+from __future__ import print_function
+
+import math
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+import warnings
+
+# customize some matplotlib attributes
+#matplotlib.rc('figure', figsize=(4, 3))
+
+#matplotlib.rc('font', size=14.0)
+#matplotlib.rc('axes', labelsize=22.0, titlesize=22.0)
+#matplotlib.rc('legend', fontsize=20.0)
+
+#matplotlib.rc('xtick.major', size=6.0)
+#matplotlib.rc('xtick.minor', size=3.0)
+
+#matplotlib.rc('ytick.major', size=6.0)
+#matplotlib.rc('ytick.minor', size=3.0)
+
+
+class _Brewer(object):
+    """Encapsulates a nice sequence of colors.
+
+    Shades of blue that look good in color and can be distinguished
+    in grayscale (up to a point).
+
+    Borrowed from http://colorbrewer2.org/
+    """
+    color_iter = None
+
+    colors = ['#f7fbff', '#deebf7', '#c6dbef',
+              '#9ecae1', '#6baed6', '#4292c6',
+              '#2171b5','#08519c','#08306b'][::-1]
+
+    # lists that indicate which colors to use depending on how many are used
+    which_colors = [[],
+                    [1],
+                    [1, 3],
+                    [0, 2, 4],
+                    [0, 2, 4, 6],
+                    [0, 2, 3, 5, 6],
+                    [0, 2, 3, 4, 5, 6],
+                    [0, 1, 2, 3, 4, 5, 6],
+                    [0, 1, 2, 3, 4, 5, 6, 7],
+                    [0, 1, 2, 3, 4, 5, 6, 7, 8],
+                    ]
+
+    current_figure = None
+
+    @classmethod
+    def Colors(cls):
+        """Returns the list of colors.
+        """
+        return cls.colors
+
+    @classmethod
+    def ColorGenerator(cls, num):
+        """Returns an iterator of color strings.
+
+        n: how many colors will be used
+        """
+        for i in cls.which_colors[num]:
+            yield cls.colors[i]
+        raise StopIteration('Ran out of colors in _Brewer.')
+
+    @classmethod
+    def InitIter(cls, num):
+        """Initializes the color iterator with the given number of colors."""
+        cls.color_iter = cls.ColorGenerator(num)
+        fig = plt.gcf()
+        cls.current_figure = fig
+
+    @classmethod
+    def ClearIter(cls):
+        """Sets the color iterator to None."""
+        cls.color_iter = None
+        cls.current_figure = None
+
+    @classmethod
+    def GetIter(cls, num):
+        """Gets the color iterator."""
+        fig = plt.gcf()
+        if fig != cls.current_figure:
+            cls.InitIter(num)
+            cls.current_figure = fig
+
+        if cls.color_iter is None:
+            cls.InitIter(num)
+
+        return cls.color_iter
+
+
+def _UnderrideColor(options):
+    """If color is not in the options, chooses a color.
+    """
+    if 'color' in options:
+        return options
+
+    # get the current color iterator; if there is none, init one
+    color_iter = _Brewer.GetIter(5)
+
+    try:
+        options['color'] = next(color_iter)
+    except StopIteration:
+        # if you run out of colors, initialize the color iterator
+        # and try again
+        warnings.warn('Ran out of colors.  Starting over.')
+        _Brewer.ClearIter()
+        _UnderrideColor(options)
+
+    return options
+
+
+def PrePlot(num=None, rows=None, cols=None):
+    """Takes hints about what's coming.
+
+    num: number of lines that will be plotted
+    rows: number of rows of subplots
+    cols: number of columns of subplots
+    """
+    if num:
+        _Brewer.InitIter(num)
+
+    if rows is None and cols is None:
+        return
+
+    if rows is not None and cols is None:
+        cols = 1
+
+    if cols is not None and rows is None:
+        rows = 1
+
+    # resize the image, depending on the number of rows and cols
+    size_map = {(1, 1): (8, 6),
+                (1, 2): (12, 6),
+                (1, 3): (12, 6),
+                (1, 4): (12, 5),
+                (1, 5): (12, 4),
+                (2, 2): (10, 10),
+                (2, 3): (16, 10),
+                (3, 1): (8, 10),
+                (4, 1): (8, 12),
+                }
+
+    if (rows, cols) in size_map:
+        fig = plt.gcf()
+        fig.set_size_inches(*size_map[rows, cols])
+
+    # create the first subplot
+    if rows > 1 or cols > 1:
+        ax = plt.subplot(rows, cols, 1)
+        global SUBPLOT_ROWS, SUBPLOT_COLS
+        SUBPLOT_ROWS = rows
+        SUBPLOT_COLS = cols
+    else:
+        ax = plt.gca()
+
+    return ax
+
+
+def SubPlot(plot_number, rows=None, cols=None, **options):
+    """Configures the number of subplots and changes the current plot.
+
+    rows: int
+    cols: int
+    plot_number: int
+    options: passed to subplot
+    """
+    rows = rows or SUBPLOT_ROWS
+    cols = cols or SUBPLOT_COLS
+    return plt.subplot(rows, cols, plot_number, **options)
+
+
+def _Underride(d, **options):
+    """Add key-value pairs to d only if key is not in d.
+
+    If d is None, create a new dictionary.
+
+    d: dictionary
+    options: keyword args to add to d
+    """
+    if d is None:
+        d = {}
+
+    for key, val in options.items():
+        d.setdefault(key, val)
+
+    return d
+
+
+def Clf():
+    """Clears the figure and any hints that have been set."""
+    global LOC
+    LOC = None
+    _Brewer.ClearIter()
+    plt.clf()
+    fig = plt.gcf()
+    fig.set_size_inches(8, 6)
+
+
+def Figure(**options):
+    """Sets options for the current figure."""
+    _Underride(options, figsize=(6, 8))
+    plt.figure(**options)
+
+
+def Plot(obj, ys=None, style='', **options):
+    """Plots a line.
+
+    Args:
+      obj: sequence of x values, or Series, or anything with Render()
+      ys: sequence of y values
+      style: style string passed along to plt.plot
+      options: keyword args passed to plt.plot
+    """
+    options = _UnderrideColor(options)
+    label = getattr(obj, 'label', '_nolegend_')
+    options = _Underride(options, linewidth=3, alpha=0.7, label=label)
+
+    xs = obj
+    if ys is None:
+        if hasattr(obj, 'Render'):
+            xs, ys = obj.Render()
+        if isinstance(obj, pd.Series):
+            ys = obj.values
+            xs = obj.index
+
+    if ys is None:
+        plt.plot(xs, style, **options)
+    else:
+        plt.plot(xs, ys, style, **options)
+
+
+def Vlines(xs, y1, y2, **options):
+    """Plots a set of vertical lines.
+
+    Args:
+      xs: sequence of x values
+      y1: sequence of y values
+      y2: sequence of y values
+      options: keyword args passed to plt.vlines
+    """
+    options = _UnderrideColor(options)
+    options = _Underride(options, linewidth=1, alpha=0.5)
+    plt.vlines(xs, y1, y2, **options)
+
+
+def Hlines(ys, x1, x2, **options):
+    """Plots a set of horizontal lines.
+
+    Args:
+      ys: sequence of y values
+      x1: sequence of x values
+      x2: sequence of x values
+      options: keyword args passed to plt.vlines
+    """
+    options = _UnderrideColor(options)
+    options = _Underride(options, linewidth=1, alpha=0.5)
+    plt.hlines(ys, x1, x2, **options)
+
+
+def axvline(x, **options):
+    """Plots a vertical line.
+
+    Args:
+      x: x location
+      options: keyword args passed to plt.axvline
+    """
+    options = _UnderrideColor(options)
+    options = _Underride(options, linewidth=1, alpha=0.5)
+    plt.axvline(x, **options)
+
+
+def axhline(y, **options):
+    """Plots a horizontal line.
+
+    Args:
+      y: y location
+      options: keyword args passed to plt.axhline
+    """
+    options = _UnderrideColor(options)
+    options = _Underride(options, linewidth=1, alpha=0.5)
+    plt.axhline(y, **options)
+
+
+def tight_layout(**options):
+    """Adjust subplots to minimize padding and margins.
+    """
+    options = _Underride(options,
+                         wspace=0.1, hspace=0.1,
+                         left=0, right=1,
+                         bottom=0, top=1)
+    plt.tight_layout()
+    plt.subplots_adjust(**options)
+
+
+def FillBetween(xs, y1, y2=None, where=None, **options):
+    """Fills the space between two lines.
+
+    Args:
+      xs: sequence of x values
+      y1: sequence of y values
+      y2: sequence of y values
+      where: sequence of boolean
+      options: keyword args passed to plt.fill_between
+    """
+    options = _UnderrideColor(options)
+    options = _Underride(options, linewidth=0, alpha=0.5)
+    plt.fill_between(xs, y1, y2, where, **options)
+
+
+def Bar(xs, ys, **options):
+    """Plots a line.
+
+    Args:
+      xs: sequence of x values
+      ys: sequence of y values
+      options: keyword args passed to plt.bar
+    """
+    options = _UnderrideColor(options)
+    options = _Underride(options, linewidth=0, alpha=0.6)
+    plt.bar(xs, ys, **options)
+
+
+def Scatter(xs, ys=None, **options):
+    """Makes a scatter plot.
+
+    xs: x values
+    ys: y values
+    options: options passed to plt.scatter
+    """
+    options = _Underride(options, color='blue', alpha=0.2,
+                         s=30, edgecolors='none')
+
+    if ys is None and isinstance(xs, pd.Series):
+        ys = xs.values
+        xs = xs.index
+
+    plt.scatter(xs, ys, **options)
+
+
+def HexBin(xs, ys, **options):
+    """Makes a scatter plot.
+
+    xs: x values
+    ys: y values
+    options: options passed to plt.scatter
+    """
+    options = _Underride(options, cmap=matplotlib.cm.Blues)
+    plt.hexbin(xs, ys, **options)
+
+
+def Pdf(pdf, **options):
+    """Plots a Pdf, Pmf, or Hist as a line.
+
+    Args:
+      pdf: Pdf, Pmf, or Hist object
+      options: keyword args passed to plt.plot
+    """
+    low, high = options.pop('low', None), options.pop('high', None)
+    n = options.pop('n', 101)
+    xs, ps = pdf.Render(low=low, high=high, n=n)
+    options = _Underride(options, label=pdf.label)
+    Plot(xs, ps, **options)
+
+
+def Pdfs(pdfs, **options):
+    """Plots a sequence of PDFs.
+
+    Options are passed along for all PDFs.  If you want different
+    options for each pdf, make multiple calls to Pdf.
+
+    Args:
+      pdfs: sequence of PDF objects
+      options: keyword args passed to plt.plot
+    """
+    for pdf in pdfs:
+        Pdf(pdf, **options)
+
+
+def Hist(hist, **options):
+    """Plots a Pmf or Hist with a bar plot.
+
+    The default width of the bars is based on the minimum difference
+    between values in the Hist.  If that's too small, you can override
+    it by providing a width keyword argument, in the same units
+    as the values.
+
+    Args:
+      hist: Hist or Pmf object
+      options: keyword args passed to plt.bar
+    """
+    # find the minimum distance between adjacent values
+    xs, ys = hist.Render()
+
+    # see if the values support arithmetic
+    try:
+        xs[0] - xs[0]
+    except TypeError:
+        # if not, replace values with numbers
+        labels = [str(x) for x in xs]
+        xs = np.arange(len(xs))
+        plt.xticks(xs+0.5, labels)
+
+    if 'width' not in options:
+        try:
+            options['width'] = 0.9 * np.diff(xs).min()
+        except TypeError:
+            warnings.warn("Hist: Can't compute bar width automatically."
+                            "Check for non-numeric types in Hist."
+                            "Or try providing width option."
+                            )
+
+    options = _Underride(options, label=hist.label)
+    options = _Underride(options, align='center')
+    if options['align'] == 'left':
+        options['align'] = 'edge'
+    elif options['align'] == 'right':
+        options['align'] = 'edge'
+        options['width'] *= -1
+
+    Bar(xs, ys, **options)
+
+
+def Hists(hists, **options):
+    """Plots two histograms as interleaved bar plots.
+
+    Options are passed along for all PMFs.  If you want different
+    options for each pmf, make multiple calls to Pmf.
+
+    Args:
+      hists: list of two Hist or Pmf objects
+      options: keyword args passed to plt.plot
+    """
+    for hist in hists:
+        Hist(hist, **options)
+
+
+def Pmf(pmf, **options):
+    """Plots a Pmf or Hist as a line.
+
+    Args:
+      pmf: Hist or Pmf object
+      options: keyword args passed to plt.plot
+    """
+    xs, ys = pmf.Render()
+    low, high = min(xs), max(xs)
+
+    width = options.pop('width', None)
+    if width is None:
+        try:
+            width = np.diff(xs).min()
+        except TypeError:
+            warnings.warn("Pmf: Can't compute bar width automatically."
+                          "Check for non-numeric types in Pmf."
+                          "Or try providing width option.")
+    points = []
+
+    lastx = np.nan
+    lasty = 0
+    for x, y in zip(xs, ys):
+        if (x - lastx) > 1e-5:
+            points.append((lastx, 0))
+            points.append((x, 0))
+
+        points.append((x, lasty))
+        points.append((x, y))
+        points.append((x+width, y))
+
+        lastx = x + width
+        lasty = y
+    points.append((lastx, 0))
+    pxs, pys = zip(*points)
+
+    align = options.pop('align', 'center')
+    if align == 'center':
+        pxs = np.array(pxs) - width/2.0
+    if align == 'right':
+        pxs = np.array(pxs) - width
+
+    options = _Underride(options, label=pmf.label)
+    Plot(pxs, pys, **options)
+
+
+def Pmfs(pmfs, **options):
+    """Plots a sequence of PMFs.
+
+    Options are passed along for all PMFs.  If you want different
+    options for each pmf, make multiple calls to Pmf.
+
+    Args:
+      pmfs: sequence of PMF objects
+      options: keyword args passed to plt.plot
+    """
+    for pmf in pmfs:
+        Pmf(pmf, **options)
+
+
+def Diff(t):
+    """Compute the differences between adjacent elements in a sequence.
+
+    Args:
+        t: sequence of number
+
+    Returns:
+        sequence of differences (length one less than t)
+    """
+    diffs = [t[i+1] - t[i] for i in range(len(t)-1)]
+    return diffs
+
+
+def Cdf(cdf, complement=False, transform=None, **options):
+    """Plots a CDF as a line.
+
+    Args:
+      cdf: Cdf object
+      complement: boolean, whether to plot the complementary CDF
+      transform: string, one of 'exponential', 'pareto', 'weibull', 'gumbel'
+      options: keyword args passed to plt.plot
+
+    Returns:
+      dictionary with the scale options that should be passed to
+      Config, Show or Save.
+    """
+    xs, ps = cdf.Render()
+    xs = np.asarray(xs)
+    ps = np.asarray(ps)
+
+    scale = dict(xscale='linear', yscale='linear')
+
+    for s in ['xscale', 'yscale']:
+        if s in options:
+            scale[s] = options.pop(s)
+
+    if transform == 'exponential':
+        complement = True
+        scale['yscale'] = 'log'
+
+    if transform == 'pareto':
+        complement = True
+        scale['yscale'] = 'log'
+        scale['xscale'] = 'log'
+
+    if complement:
+        ps = [1.0-p for p in ps]
+
+    if transform == 'weibull':
+        xs = np.delete(xs, -1)
+        ps = np.delete(ps, -1)
+        ps = [-math.log(1.0-p) for p in ps]
+        scale['xscale'] = 'log'
+        scale['yscale'] = 'log'
+
+    if transform == 'gumbel':
+        xs = np.delete(xs, 0)
+        ps = np.delete(ps, 0)
+        ps = [-math.log(p) for p in ps]
+        scale['yscale'] = 'log'
+
+    options = _Underride(options, label=cdf.label)
+    Plot(xs, ps, **options)
+    return scale
+
+
+def Cdfs(cdfs, complement=False, transform=None, **options):
+    """Plots a sequence of CDFs.
+
+    cdfs: sequence of CDF objects
+    complement: boolean, whether to plot the complementary CDF
+    transform: string, one of 'exponential', 'pareto', 'weibull', 'gumbel'
+    options: keyword args passed to plt.plot
+    """
+    for cdf in cdfs:
+        Cdf(cdf, complement, transform, **options)
+
+
+def Contour(obj, pcolor=False, contour=True, imshow=False, **options):
+    """Makes a contour plot.
+
+    d: map from (x, y) to z, or object that provides GetDict
+    pcolor: boolean, whether to make a pseudocolor plot
+    contour: boolean, whether to make a contour plot
+    imshow: boolean, whether to use plt.imshow
+    options: keyword args passed to plt.pcolor and/or plt.contour
+    """
+    try:
+        d = obj.GetDict()
+    except AttributeError:
+        d = obj
+
+    _Underride(options, linewidth=3, cmap=matplotlib.cm.Blues)
+
+    xs, ys = zip(*d.keys())
+    xs = sorted(set(xs))
+    ys = sorted(set(ys))
+
+    X, Y = np.meshgrid(xs, ys)
+    func = lambda x, y: d.get((x, y), 0)
+    func = np.vectorize(func)
+    Z = func(X, Y)
+
+    x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
+    axes = plt.gca()
+    axes.xaxis.set_major_formatter(x_formatter)
+
+    if pcolor:
+        plt.pcolormesh(X, Y, Z, **options)
+    if contour:
+        cs = plt.contour(X, Y, Z, **options)
+        plt.clabel(cs, inline=1, fontsize=10)
+    if imshow:
+        extent = xs[0], xs[-1], ys[0], ys[-1]
+        plt.imshow(Z, extent=extent, **options)
+
+
+def Pcolor(xs, ys, zs, pcolor=True, contour=False, **options):
+    """Makes a pseudocolor plot.
+
+    xs:
+    ys:
+    zs:
+    pcolor: boolean, whether to make a pseudocolor plot
+    contour: boolean, whether to make a contour plot
+    options: keyword args passed to plt.pcolor and/or plt.contour
+    """
+    _Underride(options, linewidth=3, cmap=matplotlib.cm.Blues)
+
+    X, Y = np.meshgrid(xs, ys)
+    Z = zs
+
+    x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
+    axes = plt.gca()
+    axes.xaxis.set_major_formatter(x_formatter)
+
+    if pcolor:
+        plt.pcolormesh(X, Y, Z, **options)
+
+    if contour:
+        cs = plt.contour(X, Y, Z, **options)
+        plt.clabel(cs, inline=1, fontsize=10)
+
+
+def Text(x, y, s, **options):
+    """Puts text in a figure.
+
+    x: number
+    y: number
+    s: string
+    options: keyword args passed to plt.text
+    """
+    options = _Underride(options,
+                         fontsize=16,
+                         verticalalignment='top',
+                         horizontalalignment='left')
+    plt.text(x, y, s, **options)
+
+
+LEGEND = True
+LOC = None
+
+def Config(**options):
+    """Configures the plot.
+
+    Pulls options out of the option dictionary and passes them to
+    the corresponding plt functions.
+    """
+    names = ['title', 'xlabel', 'ylabel', 'xscale', 'yscale',
+             'xticks', 'yticks', 'axis', 'xlim', 'ylim']
+
+    for name in names:
+        if name in options:
+            getattr(plt, name)(options[name])
+
+    global LEGEND
+    LEGEND = options.get('legend', LEGEND)
+
+    # see if there are any elements with labels;
+    # if not, don't draw a legend
+    ax = plt.gca()
+    handles, labels = ax.get_legend_handles_labels()
+
+    if LEGEND and len(labels) > 0:
+        global LOC
+        LOC = options.get('loc', LOC)
+        frameon = options.get('frameon', True)
+
+        try:
+            plt.legend(loc=LOC, frameon=frameon)
+        except UserWarning:
+            pass
+
+    # x and y ticklabels can be made invisible
+    val = options.get('xticklabels', None)
+    if val is not None:
+        if val == 'invisible':
+            ax = plt.gca()
+            labels = ax.get_xticklabels()
+            plt.setp(labels, visible=False)
+
+    val = options.get('yticklabels', None)
+    if val is not None:
+        if val == 'invisible':
+            ax = plt.gca()
+            labels = ax.get_yticklabels()
+            plt.setp(labels, visible=False)
+
+def set_font_size(title_size=16, label_size=16, ticklabel_size=14, legend_size=14):
+    """Set font sizes for the title, labels, ticklabels, and legend.
+    """
+    def set_text_size(texts, size):
+        for text in texts:
+            text.set_size(size)
+
+    ax = plt.gca()
+
+    # TODO: Make this function more robust if any of these elements
+    # is missing.
+
+    # title
+    ax.title.set_size(title_size)
+
+    # x axis
+    ax.xaxis.label.set_size(label_size)
+    set_text_size(ax.xaxis.get_ticklabels(), ticklabel_size)
+
+    # y axis
+    ax.yaxis.label.set_size(label_size)
+    set_text_size(ax.yaxis.get_ticklabels(), ticklabel_size)
+
+    # legend
+    legend = ax.get_legend()
+    if legend is not None:
+        set_text_size(legend.texts, legend_size)
+
+
+def bigger_text():
+    sizes = dict(title_size=16, label_size=16, ticklabel_size=14, legend_size=14)
+    set_font_size(**sizes)
+
+
+def Show(**options):
+    """Shows the plot.
+
+    For options, see Config.
+
+    options: keyword args used to invoke various plt functions
+    """
+    clf = options.pop('clf', True)
+    Config(**options)
+    plt.show()
+    if clf:
+        Clf()
+
+
+def Plotly(**options):
+    """Shows the plot.
+
+    For options, see Config.
+
+    options: keyword args used to invoke various plt functions
+    """
+    clf = options.pop('clf', True)
+    Config(**options)
+    import plotly.plotly as plotly
+    url = plotly.plot_mpl(plt.gcf())
+    if clf:
+        Clf()
+    return url
+
+
+def Save(root=None, formats=None, **options):
+    """Saves the plot in the given formats and clears the figure.
+
+    For options, see Config.
+
+    Note: With a capital S, this is the original save, maintained for
+    compatibility.  New code should use save(), which works better
+    with my newer code, especially in Jupyter notebooks.
+
+    Args:
+      root: string filename root
+      formats: list of string formats
+      options: keyword args used to invoke various plt functions
+    """
+    clf = options.pop('clf', True)
+
+    save_options = {}
+    for option in ['bbox_inches', 'pad_inches']:
+        if option in options:
+            save_options[option] = options.pop(option)
+
+    # TODO: falling Config inside Save was probably a mistake, but removing
+    # it will require some work
+    Config(**options)
+
+    if formats is None:
+        formats = ['pdf', 'png']
+
+    try:
+        formats.remove('plotly')
+        Plotly(clf=False)
+    except ValueError:
+        pass
+
+    if root:
+        for fmt in formats:
+            SaveFormat(root, fmt, **save_options)
+    if clf:
+        Clf()
+
+
+def save(root, formats=None, **options):
+    """Saves the plot in the given formats and clears the figure.
+
+    For options, see plt.savefig.
+
+    Args:
+      root: string filename root
+      formats: list of string formats
+      options: keyword args passed to plt.savefig
+    """
+    if formats is None:
+        formats = ['pdf', 'png']
+
+    try:
+        formats.remove('plotly')
+        Plotly(clf=False)
+    except ValueError:
+        pass
+
+    for fmt in formats:
+        SaveFormat(root, fmt, **options)
+
+
+def SaveFormat(root, fmt='eps', **options):
+    """Writes the current figure to a file in the given format.
+
+    Args:
+      root: string filename root
+      fmt: string format
+    """
+    _Underride(options, dpi=300)
+    filename = '%s.%s' % (root, fmt)
+    print('Writing', filename)
+    plt.savefig(filename, format=fmt, **options)
+
+
+# provide aliases for calling functions with lower-case names
+preplot = PrePlot
+subplot = SubPlot
+clf = Clf
+figure = Figure
+plot = Plot
+vlines = Vlines
+hlines = Hlines
+fill_between = FillBetween
+text = Text
+scatter = Scatter
+pmf = Pmf
+pmfs = Pmfs
+hist = Hist
+hists = Hists
+diff = Diff
+cdf = Cdf
+cdfs = Cdfs
+contour = Contour
+pcolor = Pcolor
+config = Config
+show = Show
+
+
+def main():
+    color_iter = _Brewer.ColorGenerator(7)
+    for color in color_iter:
+        print(color)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/thinkstats2.py b/thinkstats2.py
new file mode 100644
index 0000000..42c2bbc
--- /dev/null
+++ b/thinkstats2.py
@@ -0,0 +1,3041 @@
+"""This file contains code for use with "Think Stats" and
+"Think Bayes", both by Allen B. Downey, available from greenteapress.com
+
+Copyright 2014 Allen B. Downey
+License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
+"""
+
+from __future__ import print_function, division
+
+"""This file contains class definitions for:
+
+Hist: represents a histogram (map from values to integer frequencies).
+
+Pmf: represents a probability mass function (map from values to probs).
+
+_DictWrapper: private parent class for Hist and Pmf.
+
+Cdf: represents a discrete cumulative distribution function
+
+Pdf: represents a continuous probability density function
+
+"""
+
+import bisect
+import copy
+import logging
+import math
+import random
+import re
+
+from collections import Counter
+from operator import itemgetter
+
+import thinkplot
+
+import numpy as np
+import pandas
+
+import scipy
+from scipy import stats
+from scipy import special
+from scipy import ndimage
+
+from scipy.special import gamma
+
+from io import open
+
+ROOT2 = math.sqrt(2)
+
+def RandomSeed(x):
+    """Initialize the random and np.random generators.
+
+    x: int seed
+    """
+    random.seed(x)
+    np.random.seed(x)
+    
+
+def Odds(p):
+    """Computes odds for a given probability.
+
+    Example: p=0.75 means 75 for and 25 against, or 3:1 odds in favor.
+
+    Note: when p=1, the formula for odds divides by zero, which is
+    normally undefined.  But I think it is reasonable to define Odds(1)
+    to be infinity, so that's what this function does.
+
+    p: float 0-1
+
+    Returns: float odds
+    """
+    if p == 1:
+        return float('inf')
+    return p / (1 - p)
+
+
+def Probability(o):
+    """Computes the probability corresponding to given odds.
+
+    Example: o=2 means 2:1 odds in favor, or 2/3 probability
+
+    o: float odds, strictly positive
+
+    Returns: float probability
+    """
+    return o / (o + 1)
+
+
+def Probability2(yes, no):
+    """Computes the probability corresponding to given odds.
+
+    Example: yes=2, no=1 means 2:1 odds in favor, or 2/3 probability.
+    
+    yes, no: int or float odds in favor
+    """
+    return yes / (yes + no)
+
+
+class Interpolator(object):
+    """Represents a mapping between sorted sequences; performs linear interp.
+
+    Attributes:
+        xs: sorted list
+        ys: sorted list
+    """
+
+    def __init__(self, xs, ys):
+        self.xs = xs
+        self.ys = ys
+
+    def Lookup(self, x):
+        """Looks up x and returns the corresponding value of y."""
+        return self._Bisect(x, self.xs, self.ys)
+
+    def Reverse(self, y):
+        """Looks up y and returns the corresponding value of x."""
+        return self._Bisect(y, self.ys, self.xs)
+
+    def _Bisect(self, x, xs, ys):
+        """Helper function."""
+        if x <= xs[0]:
+            return ys[0]
+        if x >= xs[-1]:
+            return ys[-1]
+        i = bisect.bisect(xs, x)
+        frac = 1.0 * (x - xs[i - 1]) / (xs[i] - xs[i - 1])
+        y = ys[i - 1] + frac * 1.0 * (ys[i] - ys[i - 1])
+        return y
+
+
+# When we plot Hist, Pmf and Cdf objects, they don't appear in
+# the legend unless we override the default label.
+DEFAULT_LABEL = '_nolegend_' 
+
+
+class _DictWrapper(object):
+    """An object that contains a dictionary."""
+
+    def __init__(self, obj=None, label=None):
+        """Initializes the distribution.
+
+        obj: Hist, Pmf, Cdf, Pdf, dict, pandas Series, list of pairs
+        label: string label
+        """
+        self.label = label if label is not None else DEFAULT_LABEL
+        self.d = {}
+
+        # flag whether the distribution is under a log transform
+        self.log = False
+
+        if obj is None:
+            return
+
+        if isinstance(obj, (_DictWrapper, Cdf, Pdf)):
+            self.label = label if label is not None else obj.label
+
+        if isinstance(obj, dict):
+            self.d.update(obj.items())
+        elif isinstance(obj, (_DictWrapper, Cdf, Pdf)):
+            self.d.update(obj.Items())
+        elif isinstance(obj, pandas.Series):
+            self.d.update(obj.value_counts().iteritems())
+        else:
+            # finally, treat it like a list
+            self.d.update(Counter(obj))
+
+        if len(self) > 0 and isinstance(self, Pmf):
+            self.Normalize()
+
+    def __hash__(self):
+        return id(self)
+
+    def __str__(self):
+        cls = self.__class__.__name__
+        if self.label == DEFAULT_LABEL:
+            return '%s(%s)' % (cls, str(self.d))
+        else:
+            return self.label
+
+    def __repr__(self):
+        cls = self.__class__.__name__
+        if self.label == DEFAULT_LABEL:
+            return '%s(%s)' % (cls, repr(self.d))
+        else:
+            return '%s(%s, %s)' % (cls, repr(self.d), repr(self.label))
+
+    def __eq__(self, other):
+        try:
+            return self.d == other.d
+        except AttributeError:
+            return False
+
+    def __len__(self):
+        return len(self.d)
+
+    def __iter__(self):
+        return iter(self.d)
+
+    def iterkeys(self):
+        """Returns an iterator over keys."""
+        return iter(self.d)
+
+    def __contains__(self, value):
+        return value in self.d
+
+    def __getitem__(self, value):
+        return self.d.get(value, 0)
+
+    def __setitem__(self, value, prob):
+        self.d[value] = prob
+
+    def __delitem__(self, value):
+        del self.d[value]
+
+    def Copy(self, label=None):
+        """Returns a copy.
+
+        Make a shallow copy of d.  If you want a deep copy of d,
+        use copy.deepcopy on the whole object.
+
+        label: string label for the new Hist
+
+        returns: new _DictWrapper with the same type
+        """
+        new = copy.copy(self)
+        new.d = copy.copy(self.d)
+        new.label = label if label is not None else self.label
+        return new
+
+    def Scale(self, factor):
+        """Multiplies the values by a factor.
+
+        factor: what to multiply by
+
+        Returns: new object
+        """
+        new = self.Copy()
+        new.d.clear()
+
+        for val, prob in self.Items():
+            new.Set(val * factor, prob)
+        return new
+
+    def Log(self, m=None):
+        """Log transforms the probabilities.
+        
+        Removes values with probability 0.
+
+        Normalizes so that the largest logprob is 0.
+        """
+        if self.log:
+            raise ValueError("Pmf/Hist already under a log transform")
+        self.log = True
+
+        if m is None:
+            m = self.MaxLike()
+
+        for x, p in self.d.items():
+            if p:
+                self.Set(x, math.log(p / m))
+            else:
+                self.Remove(x)
+
+    def Exp(self, m=None):
+        """Exponentiates the probabilities.
+
+        m: how much to shift the ps before exponentiating
+
+        If m is None, normalizes so that the largest prob is 1.
+        """
+        if not self.log:
+            raise ValueError("Pmf/Hist not under a log transform")
+        self.log = False
+
+        if m is None:
+            m = self.MaxLike()
+
+        for x, p in self.d.items():
+            self.Set(x, math.exp(p - m))
+
+    def GetDict(self):
+        """Gets the dictionary."""
+        return self.d
+
+    def SetDict(self, d):
+        """Sets the dictionary."""
+        self.d = d
+
+    def Values(self):
+        """Gets an unsorted sequence of values.
+
+        Note: one source of confusion is that the keys of this
+        dictionary are the values of the Hist/Pmf, and the
+        values of the dictionary are frequencies/probabilities.
+        """
+        return self.d.keys()
+
+    def Items(self):
+        """Gets an unsorted sequence of (value, freq/prob) pairs."""
+        return self.d.items()
+
+    def SortedItems(self):
+        """Gets a sorted sequence of (value, freq/prob) pairs.
+
+        It items are unsortable, the result is unsorted.
+        """
+        def isnan(x):
+            try:
+                return math.isnan(x)
+            except TypeError:
+                return False
+
+        if any([isnan(x) for x in self.Values()]):
+            msg = 'Keys contain NaN, may not sort correctly.'
+            logging.warning(msg)
+
+        try:
+            return sorted(self.d.items())
+        except TypeError:
+            return self.d.items()
+
+    def Render(self, **options):
+        """Generates a sequence of points suitable for plotting.
+
+        Note: options are ignored
+
+        Returns:
+            tuple of (sorted value sequence, freq/prob sequence)
+        """
+        return zip(*self.SortedItems())
+
+    def MakeCdf(self, label=None):
+        """Makes a Cdf."""
+        label = label if label is not None else self.label
+        return Cdf(self, label=label)
+
+    def Print(self):
+        """Prints the values and freqs/probs in ascending order."""
+        for val, prob in self.SortedItems():
+            print(val, prob)
+
+    def Set(self, x, y=0):
+        """Sets the freq/prob associated with the value x.
+
+        Args:
+            x: number value
+            y: number freq or prob
+        """
+        self.d[x] = y
+
+    def Incr(self, x, term=1):
+        """Increments the freq/prob associated with the value x.
+
+        Args:
+            x: number value
+            term: how much to increment by
+        """
+        self.d[x] = self.d.get(x, 0) + term
+
+    def Mult(self, x, factor):
+        """Scales the freq/prob associated with the value x.
+
+        Args:
+            x: number value
+            factor: how much to multiply by
+        """
+        self.d[x] = self.d.get(x, 0) * factor
+
+    def Remove(self, x):
+        """Removes a value.
+
+        Throws an exception if the value is not there.
+
+        Args:
+            x: value to remove
+        """
+        del self.d[x]
+
+    def Total(self):
+        """Returns the total of the frequencies/probabilities in the map."""
+        total = sum(self.d.values())
+        return total
+
+    def MaxLike(self):
+        """Returns the largest frequency/probability in the map."""
+        return max(self.d.values())
+
+    def Largest(self, n=10):
+        """Returns the largest n values, with frequency/probability.
+
+        n: number of items to return
+        """
+        return sorted(self.d.items(), reverse=True)[:n]
+
+    def Smallest(self, n=10):
+        """Returns the smallest n values, with frequency/probability.
+
+        n: number of items to return
+        """
+        return sorted(self.d.items(), reverse=False)[:n]
+
+
+class Hist(_DictWrapper):
+    """Represents a histogram, which is a map from values to frequencies.
+
+    Values can be any hashable type; frequencies are integer counters.
+    """
+    def Freq(self, x):
+        """Gets the frequency associated with the value x.
+
+        Args:
+            x: number value
+
+        Returns:
+            int frequency
+        """
+        return self.d.get(x, 0)
+
+    def Freqs(self, xs):
+        """Gets frequencies for a sequence of values."""
+        return [self.Freq(x) for x in xs]
+
+    def IsSubset(self, other):
+        """Checks whether the values in this histogram are a subset of
+        the values in the given histogram."""
+        for val, freq in self.Items():
+            if freq > other.Freq(val):
+                return False
+        return True
+
+    def Subtract(self, other):
+        """Subtracts the values in the given histogram from this histogram."""
+        for val, freq in other.Items():
+            self.Incr(val, -freq)
+
+
+class Pmf(_DictWrapper):
+    """Represents a probability mass function.
+    
+    Values can be any hashable type; probabilities are floating-point.
+    Pmfs are not necessarily normalized.
+    """
+
+    def Prob(self, x, default=0):
+        """Gets the probability associated with the value x.
+
+        Args:
+            x: number value
+            default: value to return if the key is not there
+
+        Returns:
+            float probability
+        """
+        return self.d.get(x, default)
+
+    def Probs(self, xs):
+        """Gets probabilities for a sequence of values."""
+        return [self.Prob(x) for x in xs]
+
+    def Percentile(self, percentage):
+        """Computes a percentile of a given Pmf.
+
+        Note: this is not super efficient.  If you are planning
+        to compute more than a few percentiles, compute the Cdf.
+
+        percentage: float 0-100
+
+        returns: value from the Pmf
+        """
+        p = percentage / 100
+        total = 0
+        for val, prob in sorted(self.Items()):
+            total += prob
+            if total >= p:
+                return val
+
+    def ProbGreater(self, x):
+        """Probability that a sample from this Pmf exceeds x.
+
+        x: number
+
+        returns: float probability
+        """
+        if isinstance(x, _DictWrapper):
+            return PmfProbGreater(self, x)
+        else:
+            t = [prob for (val, prob) in self.d.items() if val > x]
+            return sum(t)
+
+    def ProbLess(self, x):
+        """Probability that a sample from this Pmf is less than x.
+
+        x: number
+
+        returns: float probability
+        """
+        if isinstance(x, _DictWrapper):
+            return PmfProbLess(self, x)
+        else:
+            t = [prob for (val, prob) in self.d.items() if val < x]
+            return sum(t)
+
+    def ProbEqual(self, x):
+        """Probability that a sample from this Pmf is exactly x.
+
+        x: number
+
+        returns: float probability
+        """
+        if isinstance(x, _DictWrapper):
+            return PmfProbEqual(self, x)
+        else:
+            return self[x]
+
+    # NOTE: I've decided to remove the magic comparators because they
+    # have the side-effect of making Pmf sortable, but in fact they
+    # don't support sorting.
+
+    def Normalize(self, fraction=1):
+        """Normalizes this PMF so the sum of all probs is fraction.
+
+        Args:
+            fraction: what the total should be after normalization
+
+        Returns: the total probability before normalizing
+        """
+        if self.log:
+            raise ValueError("Normalize: Pmf is under a log transform")
+
+        total = self.Total()
+        if total == 0:
+            raise ValueError('Normalize: total probability is zero.')
+
+        factor = fraction / total
+        for x in self.d:
+            self.d[x] *= factor
+
+        return total
+
+    def Random(self):
+        """Chooses a random element from this PMF.
+
+        Note: this is not very efficient.  If you plan to call
+        this more than a few times, consider converting to a CDF.
+
+        Returns:
+            float value from the Pmf
+        """
+        target = random.random()
+        total = 0
+        for x, p in self.d.items():
+            total += p
+            if total >= target:
+                return x
+
+        # we shouldn't get here
+        raise ValueError('Random: Pmf might not be normalized.')
+
+    def Sample(self, n):
+        """Generates a random sample from this distribution.
+        
+        n: int length of the sample
+        returns: NumPy array
+        """
+        return self.MakeCdf().Sample(n)
+
+    def Mean(self):
+        """Computes the mean of a PMF.
+
+        Returns:
+            float mean
+        """
+        return sum(p * x for x, p in self.Items())
+
+    def Median(self):
+        """Computes the median of a PMF.
+
+        Returns:
+            float median
+        """
+        return self.MakeCdf().Percentile(50)
+
+    def Var(self, mu=None):
+        """Computes the variance of a PMF.
+
+        mu: the point around which the variance is computed;
+                if omitted, computes the mean
+
+        returns: float variance
+        """
+        if mu is None:
+            mu = self.Mean()
+
+        return sum(p * (x-mu)**2 for x, p in self.Items())
+
+    def Expect(self, func):
+        """Computes the expectation of func(x).
+
+        Returns:
+            expectation
+        """
+        return np.sum(p * func(x) for x, p in self.Items()) 
+
+    def Std(self, mu=None):
+        """Computes the standard deviation of a PMF.
+
+        mu: the point around which the variance is computed;
+                if omitted, computes the mean
+
+        returns: float standard deviation
+        """
+        var = self.Var(mu)
+        return math.sqrt(var)
+
+    def Mode(self):
+        """Returns the value with the highest probability.
+
+        Returns: float probability
+        """
+        _, val = max((prob, val) for val, prob in self.Items())
+        return val
+
+    # The mode of a posterior is the maximum aposteori probability (MAP)
+    MAP = Mode
+
+    # If the distribution contains likelihoods only, the peak is the
+    # maximum likelihood estimator.
+    MaximumLikelihood = Mode
+
+    def CredibleInterval(self, percentage=90):
+        """Computes the central credible interval.
+
+        If percentage=90, computes the 90% CI.
+
+        Args:
+            percentage: float between 0 and 100
+
+        Returns:
+            sequence of two floats, low and high
+        """
+        cdf = self.MakeCdf()
+        return cdf.CredibleInterval(percentage)
+
+    def __add__(self, other):
+        """Computes the Pmf of the sum of values drawn from self and other.
+
+        other: another Pmf or a scalar
+
+        returns: new Pmf
+        """
+        try:
+            return self.AddPmf(other)
+        except AttributeError:
+            return self.AddConstant(other)
+
+    __radd__ = __add__
+
+    def AddPmf(self, other):
+        """Computes the Pmf of the sum of values drawn from self and other.
+
+        other: another Pmf
+
+        returns: new Pmf
+        """
+        pmf = Pmf()
+        for v1, p1 in self.Items():
+            for v2, p2 in other.Items():
+                pmf[v1 + v2] += p1 * p2
+        return pmf
+
+    def AddConstant(self, other):
+        """Computes the Pmf of the sum a constant and values from self.
+
+        other: a number
+
+        returns: new Pmf
+        """
+        if other == 0:
+            return self.Copy()
+
+        pmf = Pmf()
+        for v1, p1 in self.Items():
+            pmf.Set(v1 + other, p1)
+        return pmf
+
+    def __sub__(self, other):
+        """Computes the Pmf of the diff of values drawn from self and other.
+
+        other: another Pmf
+
+        returns: new Pmf
+        """
+        try:
+            return self.SubPmf(other)
+        except AttributeError:
+            return self.AddConstant(-other)
+
+    def SubPmf(self, other):
+        """Computes the Pmf of the diff of values drawn from self and other.
+
+        other: another Pmf
+
+        returns: new Pmf
+        """
+        pmf = Pmf()
+        for v1, p1 in self.Items():
+            for v2, p2 in other.Items():
+                pmf.Incr(v1 - v2, p1 * p2)
+        return pmf
+
+    def __mul__(self, other):
+        """Computes the Pmf of the product of values drawn from self and other.
+
+        other: another Pmf
+
+        returns: new Pmf
+        """
+        try:
+            return self.MulPmf(other)
+        except AttributeError:
+            return self.MulConstant(other)
+
+    def MulPmf(self, other):
+        """Computes the Pmf of the diff of values drawn from self and other.
+
+        other: another Pmf
+
+        returns: new Pmf
+        """
+        pmf = Pmf()
+        for v1, p1 in self.Items():
+            for v2, p2 in other.Items():
+                pmf.Incr(v1 * v2, p1 * p2)
+        return pmf
+
+    def MulConstant(self, other):
+        """Computes the Pmf of the product of a constant and values from self.
+
+        other: a number
+
+        returns: new Pmf
+        """
+        pmf = Pmf()
+        for v1, p1 in self.Items():
+            pmf.Set(v1 * other, p1)
+        return pmf
+
+    def __div__(self, other):
+        """Computes the Pmf of the ratio of values drawn from self and other.
+
+        other: another Pmf
+
+        returns: new Pmf
+        """
+        try:
+            return self.DivPmf(other)
+        except AttributeError:
+            return self.MulConstant(1/other)
+
+    __truediv__ = __div__
+
+    def DivPmf(self, other):
+        """Computes the Pmf of the ratio of values drawn from self and other.
+
+        other: another Pmf
+
+        returns: new Pmf
+        """
+        pmf = Pmf()
+        for v1, p1 in self.Items():
+            for v2, p2 in other.Items():
+                pmf.Incr(v1 / v2, p1 * p2)
+        return pmf
+
+    def Max(self, k):
+        """Computes the CDF of the maximum of k selections from this dist.
+
+        k: int
+
+        returns: new Cdf
+        """
+        cdf = self.MakeCdf()
+        cdf.ps **= k
+        return cdf
+
+
+class Joint(Pmf):
+    """Represents a joint distribution.
+
+    The values are sequences (usually tuples)
+    """
+
+    def Marginal(self, i, label=None):
+        """Gets the marginal distribution of the indicated variable.
+
+        i: index of the variable we want
+
+        Returns: Pmf
+        """
+        pmf = Pmf(label=label)
+        for vs, prob in self.Items():
+            pmf.Incr(vs[i], prob)
+        return pmf
+
+    def Conditional(self, i, j, val, label=None):
+        """Gets the conditional distribution of the indicated variable.
+
+        Distribution of vs[i], conditioned on vs[j] = val.
+
+        i: index of the variable we want
+        j: which variable is conditioned on
+        val: the value the jth variable has to have
+
+        Returns: Pmf
+        """
+        pmf = Pmf(label=label)
+        for vs, prob in self.Items():
+            if vs[j] != val:
+                continue
+            pmf.Incr(vs[i], prob)
+
+        pmf.Normalize()
+        return pmf
+
+    def MaxLikeInterval(self, percentage=90):
+        """Returns the maximum-likelihood credible interval.
+
+        If percentage=90, computes a 90% CI containing the values
+        with the highest likelihoods.
+
+        percentage: float between 0 and 100
+
+        Returns: list of values from the suite
+        """
+        interval = []
+        total = 0
+
+        t = [(prob, val) for val, prob in self.Items()]
+        t.sort(reverse=True)
+
+        for prob, val in t:
+            interval.append(val)
+            total += prob
+            if total >= percentage / 100:
+                break
+
+        return interval
+
+
+def MakeJoint(pmf1, pmf2):
+    """Joint distribution of values from pmf1 and pmf2.
+
+    Assumes that the PMFs represent independent random variables.
+
+    Args:
+        pmf1: Pmf object
+        pmf2: Pmf object
+
+    Returns:
+        Joint pmf of value pairs
+    """
+    joint = Joint()
+    for v1, p1 in pmf1.Items():
+        for v2, p2 in pmf2.Items():
+            joint.Set((v1, v2), p1 * p2)
+    return joint
+
+
+def MakeHistFromList(t, label=None):
+    """Makes a histogram from an unsorted sequence of values.
+
+    Args:
+        t: sequence of numbers
+        label: string label for this histogram
+
+    Returns:
+        Hist object
+    """
+    return Hist(t, label=label)
+
+
+def MakeHistFromDict(d, label=None):
+    """Makes a histogram from a map from values to frequencies.
+
+    Args:
+        d: dictionary that maps values to frequencies
+        label: string label for this histogram
+
+    Returns:
+        Hist object
+    """
+    return Hist(d, label)
+
+
+def MakePmfFromList(t, label=None):
+    """Makes a PMF from an unsorted sequence of values.
+
+    Args:
+        t: sequence of numbers
+        label: string label for this PMF
+
+    Returns:
+        Pmf object
+    """
+    return Pmf(t, label=label)
+
+
+def MakePmfFromDict(d, label=None):
+    """Makes a PMF from a map from values to probabilities.
+
+    Args:
+        d: dictionary that maps values to probabilities
+        label: string label for this PMF
+
+    Returns:
+        Pmf object
+    """
+    return Pmf(d, label=label)
+
+
+def MakePmfFromItems(t, label=None):
+    """Makes a PMF from a sequence of value-probability pairs
+
+    Args:
+        t: sequence of value-probability pairs
+        label: string label for this PMF
+
+    Returns:
+        Pmf object
+    """
+    return Pmf(dict(t), label=label)
+
+
+def MakePmfFromHist(hist, label=None):
+    """Makes a normalized PMF from a Hist object.
+
+    Args:
+        hist: Hist object
+        label: string label
+
+    Returns:
+        Pmf object
+    """
+    if label is None:
+        label = hist.label
+
+    return Pmf(hist, label=label)
+
+
+def MakeMixture(metapmf, label='mix'):
+    """Make a mixture distribution.
+
+    Args:
+      metapmf: Pmf that maps from Pmfs to probs.
+      label: string label for the new Pmf.
+
+    Returns: Pmf object.
+    """
+    mix = Pmf(label=label)
+    for pmf, p1 in metapmf.Items():
+        for x, p2 in pmf.Items():
+            mix[x] += p1 * p2
+    return mix
+
+
+def MakeUniformPmf(low, high, n):
+    """Make a uniform Pmf.
+
+    low: lowest value (inclusive)
+    high: highest value (inclusize)
+    n: number of values
+    """
+    pmf = Pmf()
+    for x in np.linspace(low, high, n):
+        pmf.Set(x, 1)
+    pmf.Normalize()
+    return pmf
+
+
+class Cdf:
+    """Represents a cumulative distribution function.
+
+    Attributes:
+        xs: sequence of values
+        ps: sequence of probabilities
+        label: string used as a graph label.
+    """
+    def __init__(self, obj=None, ps=None, label=None):
+        """Initializes.
+        
+        If ps is provided, obj must be the corresponding list of values.
+
+        obj: Hist, Pmf, Cdf, Pdf, dict, pandas Series, list of pairs
+        ps: list of cumulative probabilities
+        label: string label
+        """
+        self.label = label if label is not None else DEFAULT_LABEL
+
+        if isinstance(obj, (_DictWrapper, Cdf, Pdf)):
+            if not label:
+                self.label = label if label is not None else obj.label
+
+        if obj is None:
+            # caller does not provide obj, make an empty Cdf
+            self.xs = np.asarray([])
+            self.ps = np.asarray([])
+            if ps is not None:
+                logging.warning("Cdf: can't pass ps without also passing xs.")
+            return
+        else:
+            # if the caller provides xs and ps, just store them          
+            if ps is not None:
+                if isinstance(ps, str):
+                    logging.warning("Cdf: ps can't be a string")
+
+                self.xs = np.asarray(obj)
+                self.ps = np.asarray(ps)
+                return
+
+        # caller has provided just obj, not ps
+        if isinstance(obj, Cdf):
+            self.xs = copy.copy(obj.xs)
+            self.ps = copy.copy(obj.ps)
+            return
+
+        if isinstance(obj, _DictWrapper):
+            dw = obj
+        else:
+            dw = Hist(obj)
+
+        if len(dw) == 0:
+            self.xs = np.asarray([])
+            self.ps = np.asarray([])
+            return
+
+        xs, freqs = zip(*sorted(dw.Items()))
+        self.xs = np.asarray(xs)
+        self.ps = np.cumsum(freqs, dtype=np.float)
+        self.ps /= self.ps[-1]
+
+    def __str__(self):
+        cls = self.__class__.__name__
+        if self.label == DEFAULT_LABEL:
+            return '%s(%s, %s)' % (cls, str(self.xs), str(self.ps))
+        else:
+            return self.label
+
+    def __repr__(self):
+        cls = self.__class__.__name__
+        if self.label == DEFAULT_LABEL:
+            return '%s(%s, %s)' % (cls, str(self.xs), str(self.ps))
+        else:
+            return '%s(%s, %s, %s)' % (cls, str(self.xs), str(self.ps), 
+                                       repr(self.label))
+
+    def __len__(self):
+        return len(self.xs)
+
+    def __getitem__(self, x):
+        return self.Prob(x)
+
+    def __setitem__(self):
+        raise UnimplementedMethodException()
+
+    def __delitem__(self):
+        raise UnimplementedMethodException()
+
+    def __eq__(self, other):
+        return np.all(self.xs == other.xs) and np.all(self.ps == other.ps)
+
+    def Print(self):
+        """Prints the values and freqs/probs in ascending order."""
+        for val, prob in zip(self.xs, self.ps):
+            print(val, prob)
+
+    def Copy(self, label=None):
+        """Returns a copy of this Cdf.
+
+        label: string label for the new Cdf
+        """
+        if label is None:
+            label = self.label
+        return Cdf(list(self.xs), list(self.ps), label=label)
+
+    def MakePmf(self, label=None):
+        """Makes a Pmf."""
+        if label is None:
+            label = self.label
+        return Pmf(self, label=label)
+
+    def Items(self):
+        """Returns a sorted sequence of (value, probability) pairs.
+
+        Note: in Python3, returns an iterator.
+        """
+        a = self.ps
+        b = np.roll(a, 1)
+        b[0] = 0
+        return zip(self.xs, a-b)
+
+    def Shift(self, term):
+        """Adds a term to the xs.
+
+        term: how much to add
+        """
+        new = self.Copy()
+        # don't use +=, or else an int array + float yields int array
+        new.xs = new.xs + term
+        return new
+
+    def Scale(self, factor):
+        """Multiplies the xs by a factor.
+
+        factor: what to multiply by
+        """
+        new = self.Copy()
+        # don't use *=, or else an int array * float yields int array
+        new.xs = new.xs * factor
+        return new
+
+    def Prob(self, x):
+        """Returns CDF(x), the probability that corresponds to value x.
+
+        Args:
+            x: number
+
+        Returns:
+            float probability
+        """
+        if x < self.xs[0]:
+            return 0
+        index = bisect.bisect(self.xs, x)
+        p = self.ps[index-1]
+        return p
+
+    def Probs(self, xs):
+        """Gets probabilities for a sequence of values.
+
+        xs: any sequence that can be converted to NumPy array
+
+        returns: NumPy array of cumulative probabilities
+        """
+        xs = np.asarray(xs)
+        index = np.searchsorted(self.xs, xs, side='right')
+        ps = self.ps[index-1]
+        ps[xs < self.xs[0]] = 0
+        return ps
+
+    ProbArray = Probs
+
+    def Value(self, p):
+        """Returns InverseCDF(p), the value that corresponds to probability p.
+
+        Args:
+            p: number in the range [0, 1]
+
+        Returns:
+            number value
+        """
+        if p < 0 or p > 1:
+            raise ValueError('Probability p must be in range [0, 1]')
+
+        index = bisect.bisect_left(self.ps, p)
+        return self.xs[index]
+
+    def Values(self, ps=None):
+        """Returns InverseCDF(p), the value that corresponds to probability p.
+
+        If ps is not provided, returns all values.
+
+        Args:
+            ps: NumPy array of numbers in the range [0, 1]
+
+        Returns:
+            NumPy array of values
+        """
+        if ps is None:
+            return self.xs
+
+        ps = np.asarray(ps)
+        if np.any(ps < 0) or np.any(ps > 1):
+            raise ValueError('Probability p must be in range [0, 1]')
+
+        index = np.searchsorted(self.ps, ps, side='left')
+        return self.xs[index]
+
+    ValueArray = Values
+
+    def Percentile(self, p):
+        """Returns the value that corresponds to percentile p.
+
+        Args:
+            p: number in the range [0, 100]
+
+        Returns:
+            number value
+        """
+        return self.Value(p / 100)
+
+    def Percentiles(self, ps):
+        """Returns the value that corresponds to percentiles ps.
+
+        Args:
+            ps: numbers in the range [0, 100]
+
+        Returns:
+            array of values
+        """
+        ps = np.asarray(ps)
+        return self.Values(ps / 100)
+
+    def PercentileRank(self, x):
+        """Returns the percentile rank of the value x.
+
+        x: potential value in the CDF
+
+        returns: percentile rank in the range 0 to 100
+        """
+        return self.Prob(x) * 100
+
+    def PercentileRanks(self, xs):
+        """Returns the percentile ranks of the values in xs.
+
+        xs: potential value in the CDF
+
+        returns: array of percentile ranks in the range 0 to 100
+        """
+        return self.Probs(x) * 100
+
+    def Random(self):
+        """Chooses a random value from this distribution."""
+        return self.Value(random.random())
+
+    def Sample(self, n):
+        """Generates a random sample from this distribution.
+        
+        n: int length of the sample
+        returns: NumPy array
+        """
+        ps = np.random.random(n)
+        return self.ValueArray(ps)
+
+    def Mean(self):
+        """Computes the mean of a CDF.
+
+        Returns:
+            float mean
+        """
+        old_p = 0
+        total = 0
+        for x, new_p in zip(self.xs, self.ps):
+            p = new_p - old_p
+            total += p * x
+            old_p = new_p
+        return total
+
+    def CredibleInterval(self, percentage=90):
+        """Computes the central credible interval.
+
+        If percentage=90, computes the 90% CI.
+
+        Args:
+            percentage: float between 0 and 100
+
+        Returns:
+            sequence of two floats, low and high
+        """
+        prob = (1 - percentage / 100) / 2
+        interval = self.Value(prob), self.Value(1 - prob)
+        return interval
+
+    ConfidenceInterval = CredibleInterval
+
+    def _Round(self, multiplier=1000):
+        """
+        An entry is added to the cdf only if the percentile differs
+        from the previous value in a significant digit, where the number
+        of significant digits is determined by multiplier.  The
+        default is 1000, which keeps log10(1000) = 3 significant digits.
+        """
+        # TODO(write this method)
+        raise UnimplementedMethodException()
+
+    def Render(self, **options):
+        """Generates a sequence of points suitable for plotting.
+
+        An empirical CDF is a step function; linear interpolation
+        can be misleading.
+
+        Note: options are ignored
+
+        Returns:
+            tuple of (xs, ps)
+        """
+        def interleave(a, b):
+            c = np.empty(a.shape[0] + b.shape[0])
+            c[::2] = a
+            c[1::2] = b
+            return c
+
+        a = np.array(self.xs)
+        xs = interleave(a, a)
+        shift_ps = np.roll(self.ps, 1)
+        shift_ps[0] = 0
+        ps = interleave(shift_ps, self.ps)
+        return xs, ps
+
+    def Max(self, k):
+        """Computes the CDF of the maximum of k selections from this dist.
+
+        k: int
+
+        returns: new Cdf
+        """
+        cdf = self.Copy()
+        cdf.ps **= k
+        return cdf
+
+
+def MakeCdfFromItems(items, label=None):
+    """Makes a cdf from an unsorted sequence of (value, frequency) pairs.
+
+    Args:
+        items: unsorted sequence of (value, frequency) pairs
+        label: string label for this CDF
+
+    Returns:
+        cdf: list of (value, fraction) pairs
+    """
+    return Cdf(dict(items), label=label)
+
+
+def MakeCdfFromDict(d, label=None):
+    """Makes a CDF from a dictionary that maps values to frequencies.
+
+    Args:
+       d: dictionary that maps values to frequencies.
+       label: string label for the data.
+
+    Returns:
+        Cdf object
+    """
+    return Cdf(d, label=label)
+
+
+def MakeCdfFromList(seq, label=None):
+    """Creates a CDF from an unsorted sequence.
+
+    Args:
+        seq: unsorted sequence of sortable values
+        label: string label for the cdf
+
+    Returns:
+       Cdf object
+    """
+    return Cdf(seq, label=label)
+
+
+def MakeCdfFromHist(hist, label=None):
+    """Makes a CDF from a Hist object.
+
+    Args:
+       hist: Pmf.Hist object
+       label: string label for the data.
+
+    Returns:
+        Cdf object
+    """
+    if label is None:
+        label = hist.label
+
+    return Cdf(hist, label=label)
+
+
+def MakeCdfFromPmf(pmf, label=None):
+    """Makes a CDF from a Pmf object.
+
+    Args:
+       pmf: Pmf.Pmf object
+       label: string label for the data.
+
+    Returns:
+        Cdf object
+    """
+    if label is None:
+        label = pmf.label
+
+    return Cdf(pmf, label=label)
+
+
+class UnimplementedMethodException(Exception):
+    """Exception if someone calls a method that should be overridden."""
+
+
+class Suite(Pmf):
+    """Represents a suite of hypotheses and their probabilities."""
+
+    def Update(self, data):
+        """Updates each hypothesis based on the data.
+
+        data: any representation of the data
+
+        returns: the normalizing constant
+        """
+        for hypo in self.Values():
+            like = self.Likelihood(data, hypo)
+            self.Mult(hypo, like)
+        return self.Normalize()
+
+    def LogUpdate(self, data):
+        """Updates a suite of hypotheses based on new data.
+
+        Modifies the suite directly; if you want to keep the original, make
+        a copy.
+
+        Note: unlike Update, LogUpdate does not normalize.
+
+        Args:
+            data: any representation of the data
+        """
+        for hypo in self.Values():
+            like = self.LogLikelihood(data, hypo)
+            self.Incr(hypo, like)
+
+    def UpdateSet(self, dataset):
+        """Updates each hypothesis based on the dataset.
+
+        This is more efficient than calling Update repeatedly because
+        it waits until the end to Normalize.
+
+        Modifies the suite directly; if you want to keep the original, make
+        a copy.
+
+        dataset: a sequence of data
+
+        returns: the normalizing constant
+        """
+        for data in dataset:
+            for hypo in self.Values():
+                like = self.Likelihood(data, hypo)
+                self.Mult(hypo, like)
+        return self.Normalize()
+
+    def LogUpdateSet(self, dataset):
+        """Updates each hypothesis based on the dataset.
+
+        Modifies the suite directly; if you want to keep the original, make
+        a copy.
+
+        dataset: a sequence of data
+
+        returns: None
+        """
+        for data in dataset:
+            self.LogUpdate(data)
+
+    def Likelihood(self, data, hypo):
+        """Computes the likelihood of the data under the hypothesis.
+
+        hypo: some representation of the hypothesis
+        data: some representation of the data
+        """
+        raise UnimplementedMethodException()
+
+    def LogLikelihood(self, data, hypo):
+        """Computes the log likelihood of the data under the hypothesis.
+
+        hypo: some representation of the hypothesis
+        data: some representation of the data
+        """
+        raise UnimplementedMethodException()
+
+    def Print(self):
+        """Prints the hypotheses and their probabilities."""
+        for hypo, prob in sorted(self.Items()):
+            print(hypo, prob)
+
+    def MakeOdds(self):
+        """Transforms from probabilities to odds.
+
+        Values with prob=0 are removed.
+        """
+        for hypo, prob in self.Items():
+            if prob:
+                self.Set(hypo, Odds(prob))
+            else:
+                self.Remove(hypo)
+
+    def MakeProbs(self):
+        """Transforms from odds to probabilities."""
+        for hypo, odds in self.Items():
+            self.Set(hypo, Probability(odds))
+
+
+def MakeSuiteFromList(t, label=None):
+    """Makes a suite from an unsorted sequence of values.
+
+    Args:
+        t: sequence of numbers
+        label: string label for this suite
+
+    Returns:
+        Suite object
+    """
+    hist = MakeHistFromList(t, label=label)
+    d = hist.GetDict()
+    return MakeSuiteFromDict(d)
+
+
+def MakeSuiteFromHist(hist, label=None):
+    """Makes a normalized suite from a Hist object.
+
+    Args:
+        hist: Hist object
+        label: string label
+
+    Returns:
+        Suite object
+    """
+    if label is None:
+        label = hist.label
+
+    # make a copy of the dictionary
+    d = dict(hist.GetDict())
+    return MakeSuiteFromDict(d, label)
+
+
+def MakeSuiteFromDict(d, label=None):
+    """Makes a suite from a map from values to probabilities.
+
+    Args:
+        d: dictionary that maps values to probabilities
+        label: string label for this suite
+
+    Returns:
+        Suite object
+    """
+    suite = Suite(label=label)
+    suite.SetDict(d)
+    suite.Normalize()
+    return suite
+
+
+class Pdf(object):
+    """Represents a probability density function (PDF)."""
+
+    def Density(self, x):
+        """Evaluates this Pdf at x.
+
+        Returns: float or NumPy array of probability density
+        """
+        raise UnimplementedMethodException()
+
+    def GetLinspace(self):
+        """Get a linspace for plotting.
+
+        Not all subclasses of Pdf implement this.
+
+        Returns: numpy array
+        """
+        raise UnimplementedMethodException()
+
+    def MakePmf(self, **options):
+        """Makes a discrete version of this Pdf.
+
+        options can include
+        label: string
+        low: low end of range
+        high: high end of range
+        n: number of places to evaluate
+
+        Returns: new Pmf
+        """
+        label = options.pop('label', '')
+        xs, ds = self.Render(**options)
+        return Pmf(dict(zip(xs, ds)), label=label)
+
+    def Render(self, **options):
+        """Generates a sequence of points suitable for plotting.
+
+        If options includes low and high, it must also include n;
+        in that case the density is evaluated an n locations between
+        low and high, including both.
+
+        If options includes xs, the density is evaluate at those location.
+
+        Otherwise, self.GetLinspace is invoked to provide the locations.
+
+        Returns:
+            tuple of (xs, densities)
+        """
+        low, high = options.pop('low', None), options.pop('high', None)
+        if low is not None and high is not None:
+            n = options.pop('n', 101)
+            xs = np.linspace(low, high, n)
+        else:
+            xs = options.pop('xs', None)
+            if xs is None:
+                xs = self.GetLinspace()
+            
+        ds = self.Density(xs)
+        return xs, ds
+
+    def Items(self):
+        """Generates a sequence of (value, probability) pairs.
+        """
+        return zip(*self.Render())
+
+
+class NormalPdf(Pdf):
+    """Represents the PDF of a Normal distribution."""
+
+    def __init__(self, mu=0, sigma=1, label=None):
+        """Constructs a Normal Pdf with given mu and sigma.
+
+        mu: mean
+        sigma: standard deviation
+        label: string
+        """
+        self.mu = mu
+        self.sigma = sigma
+        self.label = label if label is not None else '_nolegend_'
+
+    def __str__(self):
+        return 'NormalPdf(%f, %f)' % (self.mu, self.sigma)
+
+    def GetLinspace(self):
+        """Get a linspace for plotting.
+
+        Returns: numpy array
+        """
+        low, high = self.mu-3*self.sigma, self.mu+3*self.sigma
+        return np.linspace(low, high, 101)
+
+    def Density(self, xs):
+        """Evaluates this Pdf at xs.
+
+        xs: scalar or sequence of floats
+
+        returns: float or NumPy array of probability density
+        """
+        return stats.norm.pdf(xs, self.mu, self.sigma)
+
+
+class ExponentialPdf(Pdf):
+    """Represents the PDF of an exponential distribution."""
+
+    def __init__(self, lam=1, label=None):
+        """Constructs an exponential Pdf with given parameter.
+
+        lam: rate parameter
+        label: string
+        """
+        self.lam = lam
+        self.label = label if label is not None else '_nolegend_'
+
+    def __str__(self):
+        return 'ExponentialPdf(%f)' % (self.lam)
+
+    def GetLinspace(self):
+        """Get a linspace for plotting.
+
+        Returns: numpy array
+        """
+        low, high = 0, 5.0/self.lam
+        return np.linspace(low, high, 101)
+
+    def Density(self, xs):
+        """Evaluates this Pdf at xs.
+
+        xs: scalar or sequence of floats
+
+        returns: float or NumPy array of probability density
+        """
+        return stats.expon.pdf(xs, scale=1.0/self.lam)
+
+
+class EstimatedPdf(Pdf):
+    """Represents a PDF estimated by KDE."""
+
+    def __init__(self, sample, label=None):
+        """Estimates the density function based on a sample.
+
+        sample: sequence of data
+        label: string
+        """
+        self.label = label if label is not None else '_nolegend_'
+        self.kde = stats.gaussian_kde(sample)
+        low = min(sample)
+        high = max(sample)
+        self.linspace = np.linspace(low, high, 101)
+
+    def __str__(self):
+        return 'EstimatedPdf(label=%s)' % str(self.label)
+
+    def GetLinspace(self):
+        """Get a linspace for plotting.
+
+        Returns: numpy array
+        """
+        return self.linspace
+
+    def Density(self, xs):
+        """Evaluates this Pdf at xs.
+
+        returns: float or NumPy array of probability density
+        """
+        return self.kde.evaluate(xs)
+
+    def Sample(self, n):
+        """Generates a random sample from the estimated Pdf.
+
+        n: size of sample
+        """
+        # NOTE: we have to flatten because resample returns a 2-D
+        # array for some reason.
+        return self.kde.resample(n).flatten()
+
+
+def CredibleInterval(pmf, percentage=90):
+    """Computes a credible interval for a given distribution.
+
+    If percentage=90, computes the 90% CI.
+
+    Args:
+        pmf: Pmf object representing a posterior distribution
+        percentage: float between 0 and 100
+
+    Returns:
+        sequence of two floats, low and high
+    """
+    cdf = pmf.MakeCdf()
+    prob = (1 - percentage / 100) / 2
+    interval = cdf.Value(prob), cdf.Value(1 - prob)
+    return interval
+
+
+def PmfProbLess(pmf1, pmf2):
+    """Probability that a value from pmf1 is less than a value from pmf2.
+
+    Args:
+        pmf1: Pmf object
+        pmf2: Pmf object
+
+    Returns:
+        float probability
+    """
+    total = 0
+    for v1, p1 in pmf1.Items():
+        for v2, p2 in pmf2.Items():
+            if v1 < v2:
+                total += p1 * p2
+    return total
+
+
+def PmfProbGreater(pmf1, pmf2):
+    """Probability that a value from pmf1 is less than a value from pmf2.
+
+    Args:
+        pmf1: Pmf object
+        pmf2: Pmf object
+
+    Returns:
+        float probability
+    """
+    total = 0
+    for v1, p1 in pmf1.Items():
+        for v2, p2 in pmf2.Items():
+            if v1 > v2:
+                total += p1 * p2
+    return total
+
+
+def PmfProbEqual(pmf1, pmf2):
+    """Probability that a value from pmf1 equals a value from pmf2.
+
+    Args:
+        pmf1: Pmf object
+        pmf2: Pmf object
+
+    Returns:
+        float probability
+    """
+    total = 0
+    for v1, p1 in pmf1.Items():
+        for v2, p2 in pmf2.Items():
+            if v1 == v2:
+                total += p1 * p2
+    return total
+
+
+def RandomSum(dists):
+    """Chooses a random value from each dist and returns the sum.
+
+    dists: sequence of Pmf or Cdf objects
+
+    returns: numerical sum
+    """
+    total = sum(dist.Random() for dist in dists)
+    return total
+
+
+def SampleSum(dists, n):
+    """Draws a sample of sums from a list of distributions.
+
+    dists: sequence of Pmf or Cdf objects
+    n: sample size
+
+    returns: new Pmf of sums
+    """
+    pmf = Pmf(RandomSum(dists) for i in range(n))
+    return pmf
+
+
+def EvalNormalPdf(x, mu, sigma):
+    """Computes the unnormalized PDF of the normal distribution.
+
+    x: value
+    mu: mean
+    sigma: standard deviation
+    
+    returns: float probability density
+    """
+    return stats.norm.pdf(x, mu, sigma)
+
+
+def MakeNormalPmf(mu, sigma, num_sigmas, n=201):
+    """Makes a PMF discrete approx to a Normal distribution.
+    
+    mu: float mean
+    sigma: float standard deviation
+    num_sigmas: how many sigmas to extend in each direction
+    n: number of values in the Pmf
+
+    returns: normalized Pmf
+    """
+    pmf = Pmf()
+    low = mu - num_sigmas * sigma
+    high = mu + num_sigmas * sigma
+
+    for x in np.linspace(low, high, n):
+        p = EvalNormalPdf(x, mu, sigma)
+        pmf.Set(x, p)
+    pmf.Normalize()
+    return pmf
+
+
+def EvalBinomialPmf(k, n, p):
+    """Evaluates the binomial PMF.
+
+    Returns the probabily of k successes in n trials with probability p.
+    """
+    return stats.binom.pmf(k, n, p)
+    
+
+def MakeBinomialPmf(n, p):
+    """Evaluates the binomial PMF.
+
+    Returns the distribution of successes in n trials with probability p.
+    """
+    pmf = Pmf()
+    for k in range(n+1):
+        pmf[k] = stats.binom.pmf(k, n, p)
+    return pmf
+
+
+def EvalGammaPdf(x, a):
+    """Computes the Gamma PDF.
+
+    x: where to evaluate the PDF
+    a: parameter of the gamma distribution
+
+    returns: float probability
+    """
+    return x**(a-1) * np.exp(-x) / gamma(a)
+
+
+def MakeGammaPmf(xs, a):
+    """Makes a PMF discrete approx to a Gamma distribution.
+
+    lam: parameter lambda in events per unit time
+    xs: upper bound of the Pmf
+
+    returns: normalized Pmf
+    """
+    xs = np.asarray(xs)
+    ps = EvalGammaPdf(xs, a)
+    pmf = Pmf(dict(zip(xs, ps)))
+    pmf.Normalize()
+    return pmf
+
+
+def EvalGeometricPmf(k, p, loc=0):
+    """Evaluates the geometric PMF.
+
+    With loc=0: Probability of `k` trials to get one success.
+    With loc=-1: Probability of `k` trials before first success.
+
+    k: number of trials
+    p: probability of success on each trial
+    """
+    return stats.geom.pmf(k, p, loc=loc)
+    
+
+def MakeGeometricPmf(p, loc=0, high=10):
+    """Evaluates the binomial PMF.
+
+    With loc=0: PMF of trials to get one success.
+    With loc=-1: PMF of trials before first success.
+
+    p: probability of success
+    high: upper bound where PMF is truncated
+    """
+    pmf = Pmf()
+    for k in range(high):
+        pmf[k] = stats.geom.pmf(k, p, loc=loc)
+    pmf.Normalize()
+    return pmf
+
+
+def EvalHypergeomPmf(k, N, K, n):
+    """Evaluates the hypergeometric PMF.
+
+    Returns the probabily of k successes in n trials from a population
+    N with K successes in it.
+    """
+    return stats.hypergeom.pmf(k, N, K, n)
+    
+
+def EvalPoissonPmf(k, lam):
+    """Computes the Poisson PMF.
+
+    k: number of events
+    lam: parameter lambda in events per unit time
+
+    returns: float probability
+    """
+    return stats.poisson.pmf(k, lam)
+
+
+def MakePoissonPmf(lam, high, step=1):
+    """Makes a PMF discrete approx to a Poisson distribution.
+
+    lam: parameter lambda in events per unit time
+    high: upper bound of the Pmf
+
+    returns: normalized Pmf
+    """
+    pmf = Pmf()
+    for k in range(0, high + 1, step):
+        p = stats.poisson.pmf(k, lam)
+        pmf.Set(k, p)
+    pmf.Normalize()
+    return pmf
+
+
+def EvalExponentialPdf(x, lam):
+    """Computes the exponential PDF.
+
+    x: value
+    lam: parameter lambda in events per unit time
+
+    returns: float probability density
+    """
+    return lam * math.exp(-lam * x)
+
+
+def EvalExponentialCdf(x, lam):
+    """Evaluates CDF of the exponential distribution with parameter lam."""
+    return 1 - math.exp(-lam * x)
+
+
+def MakeExponentialPmf(lam, high, n=200):
+    """Makes a PMF discrete approx to an exponential distribution.
+
+    lam: parameter lambda in events per unit time
+    high: upper bound
+    n: number of values in the Pmf
+
+    returns: normalized Pmf
+    """
+    pmf = Pmf()
+    for x in np.linspace(0, high, n):
+        p = EvalExponentialPdf(x, lam)
+        pmf.Set(x, p)
+    pmf.Normalize()
+    return pmf
+
+
+def EvalWeibullPdf(x, lam, k):
+    """Computes the Weibull PDF.
+
+    x: value
+    lam: parameter lambda in events per unit time
+    k: parameter
+
+    returns: float probability density
+    """
+    arg = (x / lam)
+    return k / lam * arg**(k-1) * np.exp(-arg**k)
+
+
+def EvalWeibullCdf(x, lam, k):
+    """Evaluates CDF of the Weibull distribution."""
+    arg = (x / lam)
+    return 1 - np.exp(-arg**k)
+
+
+def MakeWeibullPmf(lam, k, high, n=200):
+    """Makes a PMF discrete approx to a Weibull distribution.
+
+    lam: parameter lambda in events per unit time
+    k: parameter
+    high: upper bound
+    n: number of values in the Pmf
+
+    returns: normalized Pmf
+    """
+    xs = np.linspace(0, high, n)
+    ps = EvalWeibullPdf(xs, lam, k)
+    ps[np.isinf(ps)] = 0
+    return Pmf(dict(zip(xs, ps)))
+
+
+def EvalParetoPdf(x, xm, alpha):
+    """Computes the Pareto.
+
+    xm: minimum value (scale parameter)
+    alpha: shape parameter
+
+    returns: float probability density
+    """
+    return stats.pareto.pdf(x, alpha, scale=xm)
+
+
+def MakeParetoPmf(xm, alpha, high, num=101):
+    """Makes a PMF discrete approx to a Pareto distribution.
+
+    xm: minimum value (scale parameter)
+    alpha: shape parameter
+    high: upper bound value
+    num: number of values
+
+    returns: normalized Pmf
+    """
+    xs = np.linspace(xm, high, num)
+    ps = stats.pareto.pdf(xs, alpha, scale=xm)
+    pmf = Pmf(dict(zip(xs, ps)))
+    return pmf
+
+def StandardNormalCdf(x):
+    """Evaluates the CDF of the standard Normal distribution.
+    
+    See http://en.wikipedia.org/wiki/Normal_distribution
+    #Cumulative_distribution_function
+
+    Args:
+        x: float
+                
+    Returns:
+        float
+    """
+    return (math.erf(x / ROOT2) + 1) / 2
+
+
+def EvalNormalCdf(x, mu=0, sigma=1):
+    """Evaluates the CDF of the normal distribution.
+    
+    Args:
+        x: float
+
+        mu: mean parameter
+        
+        sigma: standard deviation parameter
+                
+    Returns:
+        float
+    """
+    return stats.norm.cdf(x, loc=mu, scale=sigma)
+
+
+def EvalNormalCdfInverse(p, mu=0, sigma=1):
+    """Evaluates the inverse CDF of the normal distribution.
+
+    See http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function  
+
+    Args:
+        p: float
+
+        mu: mean parameter
+        
+        sigma: standard deviation parameter
+                
+    Returns:
+        float
+    """
+    return stats.norm.ppf(p, loc=mu, scale=sigma)
+
+
+def EvalLognormalCdf(x, mu=0, sigma=1):
+    """Evaluates the CDF of the lognormal distribution.
+    
+    x: float or sequence
+    mu: mean parameter
+    sigma: standard deviation parameter
+                
+    Returns: float or sequence
+    """
+    return stats.lognorm.cdf(x, loc=mu, scale=sigma)
+
+
+def RenderExpoCdf(lam, low, high, n=101):
+    """Generates sequences of xs and ps for an exponential CDF.
+
+    lam: parameter
+    low: float
+    high: float
+    n: number of points to render
+
+    returns: numpy arrays (xs, ps)
+    """
+    xs = np.linspace(low, high, n)
+    ps = 1 - np.exp(-lam * xs)
+    #ps = stats.expon.cdf(xs, scale=1.0/lam)
+    return xs, ps
+
+
+def RenderNormalCdf(mu, sigma, low, high, n=101):
+    """Generates sequences of xs and ps for a Normal CDF.
+
+    mu: parameter
+    sigma: parameter
+    low: float
+    high: float
+    n: number of points to render
+
+    returns: numpy arrays (xs, ps)
+    """
+    xs = np.linspace(low, high, n)
+    ps = stats.norm.cdf(xs, mu, sigma)
+    return xs, ps
+
+
+def RenderParetoCdf(xmin, alpha, low, high, n=50):
+    """Generates sequences of xs and ps for a Pareto CDF.
+
+    xmin: parameter
+    alpha: parameter
+    low: float
+    high: float
+    n: number of points to render
+
+    returns: numpy arrays (xs, ps)
+    """
+    if low < xmin:
+        low = xmin
+    xs = np.linspace(low, high, n)
+    ps = 1 - (xs / xmin) ** -alpha
+    #ps = stats.pareto.cdf(xs, scale=xmin, b=alpha)
+    return xs, ps
+
+
+class Beta:
+    """Represents a Beta distribution.
+
+    See http://en.wikipedia.org/wiki/Beta_distribution
+    """
+    def __init__(self, alpha=1, beta=1, label=None):
+        """Initializes a Beta distribution."""
+        self.alpha = alpha
+        self.beta = beta
+        self.label = label if label is not None else '_nolegend_'
+
+    def Update(self, data):
+        """Updates a Beta distribution.
+
+        data: pair of int (heads, tails)
+        """
+        heads, tails = data
+        self.alpha += heads
+        self.beta += tails
+
+    def Mean(self):
+        """Computes the mean of this distribution."""
+        return self.alpha / (self.alpha + self.beta)
+
+    def MAP(self):
+        """Computes the value with maximum a posteori probability."""
+        a = self.alpha - 1
+        b = self.beta - 1
+        return a / (a + b)
+
+    def Random(self):
+        """Generates a random variate from this distribution."""
+        return random.betavariate(self.alpha, self.beta)
+
+    def Sample(self, n):
+        """Generates a random sample from this distribution.
+
+        n: int sample size
+        """
+        size = n,
+        return np.random.beta(self.alpha, self.beta, size)
+
+    def EvalPdf(self, x):
+        """Evaluates the PDF at x."""
+        return x ** (self.alpha - 1) * (1 - x) ** (self.beta - 1)
+
+    def MakePmf(self, steps=101, label=None):
+        """Returns a Pmf of this distribution.
+
+        Note: Normally, we just evaluate the PDF at a sequence
+        of points and treat the probability density as a probability
+        mass.
+
+        But if alpha or beta is less than one, we have to be
+        more careful because the PDF goes to infinity at x=0
+        and x=1.  In that case we evaluate the CDF and compute
+        differences.
+
+        The result is a little funny, because the values at 0 and 1
+        are not symmetric.  Nevertheless, it is a reasonable discrete
+        model of the continuous distribution, and behaves well as
+        the number of values increases.
+        """
+        if label is None and self.label is not None:
+            label = self.label
+
+        if self.alpha < 1 or self.beta < 1:
+            cdf = self.MakeCdf()
+            pmf = cdf.MakePmf()
+            return pmf
+
+        xs = [i / (steps - 1.0) for i in range(steps)]
+        probs = [self.EvalPdf(x) for x in xs]
+        pmf = Pmf(dict(zip(xs, probs)), label=label)
+        return pmf
+
+    def MakeCdf(self, steps=101):
+        """Returns the CDF of this distribution."""
+        xs = [i / (steps - 1.0) for i in range(steps)]
+        ps = special.betainc(self.alpha, self.beta, xs)
+        cdf = Cdf(xs, ps)
+        return cdf
+
+    def Percentile(self, ps):
+        """Returns the given percentiles from this distribution.
+
+        ps: scalar, array, or list of [0-100]
+        """
+        ps = np.asarray(ps) / 100
+        xs = special.betaincinv(self.alpha, self.beta, ps)
+        return xs
+
+
+class Dirichlet(object):
+    """Represents a Dirichlet distribution.
+
+    See http://en.wikipedia.org/wiki/Dirichlet_distribution
+    """
+
+    def __init__(self, n, conc=1, label=None):
+        """Initializes a Dirichlet distribution.
+
+        n: number of dimensions
+        conc: concentration parameter (smaller yields more concentration)
+        label: string label
+        """
+        if n < 2:
+            raise ValueError('A Dirichlet distribution with '
+                             'n<2 makes no sense')
+
+        self.n = n
+        self.params = np.ones(n, dtype=np.float) * conc
+        self.label = label if label is not None else '_nolegend_'
+
+    def Update(self, data):
+        """Updates a Dirichlet distribution.
+
+        data: sequence of observations, in order corresponding to params
+        """
+        m = len(data)
+        self.params[:m] += data
+
+    def Random(self):
+        """Generates a random variate from this distribution.
+
+        Returns: normalized vector of fractions
+        """
+        p = np.random.gamma(self.params)
+        return p / p.sum()
+
+    def Likelihood(self, data):
+        """Computes the likelihood of the data.
+
+        Selects a random vector of probabilities from this distribution.
+
+        Returns: float probability
+        """
+        m = len(data)
+        if self.n < m:
+            return 0
+
+        x = data
+        p = self.Random()
+        q = p[:m] ** x
+        return q.prod()
+
+    def LogLikelihood(self, data):
+        """Computes the log likelihood of the data.
+
+        Selects a random vector of probabilities from this distribution.
+
+        Returns: float log probability
+        """
+        m = len(data)
+        if self.n < m:
+            return float('-inf')
+
+        x = self.Random()
+        y = np.log(x[:m]) * data
+        return y.sum()
+
+    def MarginalBeta(self, i):
+        """Computes the marginal distribution of the ith element.
+
+        See http://en.wikipedia.org/wiki/Dirichlet_distribution
+        #Marginal_distributions
+
+        i: int
+
+        Returns: Beta object
+        """
+        alpha0 = self.params.sum()
+        alpha = self.params[i]
+        return Beta(alpha, alpha0 - alpha)
+
+    def PredictivePmf(self, xs, label=None):
+        """Makes a predictive distribution.
+
+        xs: values to go into the Pmf
+
+        Returns: Pmf that maps from x to the mean prevalence of x
+        """
+        alpha0 = self.params.sum()
+        ps = self.params / alpha0
+        return Pmf(zip(xs, ps), label=label)
+
+
+def BinomialCoef(n, k):
+    """Compute the binomial coefficient "n choose k".
+
+    n: number of trials
+    k: number of successes
+
+    Returns: float
+    """
+    return scipy.misc.comb(n, k)
+
+
+def LogBinomialCoef(n, k):
+    """Computes the log of the binomial coefficient.
+
+    http://math.stackexchange.com/questions/64716/
+    approximating-the-logarithm-of-the-binomial-coefficient
+
+    n: number of trials
+    k: number of successes
+
+    Returns: float
+    """
+    return n * math.log(n) - k * math.log(k) - (n - k) * math.log(n - k)
+
+
+def NormalProbability(ys, jitter=0):
+    """Generates data for a normal probability plot.
+
+    ys: sequence of values
+    jitter: float magnitude of jitter added to the ys 
+
+    returns: numpy arrays xs, ys
+    """
+    n = len(ys)
+    xs = np.random.normal(0, 1, n)
+    xs.sort()
+    
+    if jitter:
+        ys = Jitter(ys, jitter)
+    else:
+        ys = np.array(ys)
+    ys.sort()
+
+    return xs, ys
+
+
+def Jitter(values, jitter=0.5):
+    """Jitters the values by adding a uniform variate in (-jitter, jitter).
+
+    values: sequence
+    jitter: scalar magnitude of jitter
+    
+    returns: new numpy array
+    """
+    n = len(values)
+    return np.random.normal(0, jitter, n) + values
+
+
+def NormalProbabilityPlot(sample, fit_color='0.8', **options):
+    """Makes a normal probability plot with a fitted line.
+
+    sample: sequence of numbers
+    fit_color: color string for the fitted line
+    options: passed along to Plot
+    """
+    xs, ys = NormalProbability(sample)
+    mean, var = MeanVar(sample)
+    std = math.sqrt(var)
+
+    fit = FitLine(xs, mean, std)
+    thinkplot.Plot(*fit, color=fit_color, label='model')
+
+    xs, ys = NormalProbability(sample)
+    thinkplot.Plot(xs, ys, **options)
+
+ 
+def Mean(xs):
+    """Computes mean.
+
+    xs: sequence of values
+
+    returns: float mean
+    """
+    return np.mean(xs)
+
+
+def Var(xs, mu=None, ddof=0):
+    """Computes variance.
+
+    xs: sequence of values
+    mu: option known mean
+    ddof: delta degrees of freedom
+
+    returns: float
+    """
+    xs = np.asarray(xs)
+
+    if mu is None:
+        mu = xs.mean()
+
+    ds = xs - mu
+    return np.dot(ds, ds) / (len(xs) - ddof)
+
+
+def Std(xs, mu=None, ddof=0):
+    """Computes standard deviation.
+
+    xs: sequence of values
+    mu: option known mean
+    ddof: delta degrees of freedom
+
+    returns: float
+    """
+    var = Var(xs, mu, ddof)
+    return math.sqrt(var)
+
+
+def MeanVar(xs, ddof=0):
+    """Computes mean and variance.
+
+    Based on http://stackoverflow.com/questions/19391149/
+    numpy-mean-and-variance-from-single-function
+
+    xs: sequence of values
+    ddof: delta degrees of freedom
+    
+    returns: pair of float, mean and var
+    """
+    xs = np.asarray(xs)
+    mean = xs.mean()
+    s2 = Var(xs, mean, ddof)
+    return mean, s2
+
+
+def Trim(t, p=0.01):
+    """Trims the largest and smallest elements of t.
+
+    Args:
+        t: sequence of numbers
+        p: fraction of values to trim off each end
+
+    Returns:
+        sequence of values
+    """
+    n = int(p * len(t))
+    t = sorted(t)[n:-n]
+    return t
+
+
+def TrimmedMean(t, p=0.01):
+    """Computes the trimmed mean of a sequence of numbers.
+
+    Args:
+        t: sequence of numbers
+        p: fraction of values to trim off each end
+
+    Returns:
+        float
+    """
+    t = Trim(t, p)
+    return Mean(t)
+
+
+def TrimmedMeanVar(t, p=0.01):
+    """Computes the trimmed mean and variance of a sequence of numbers.
+
+    Side effect: sorts the list.
+
+    Args:
+        t: sequence of numbers
+        p: fraction of values to trim off each end
+
+    Returns:
+        float
+    """
+    t = Trim(t, p)
+    mu, var = MeanVar(t)
+    return mu, var
+
+
+def CohenEffectSize(group1, group2):
+    """Compute Cohen's d.
+
+    group1: Series or NumPy array
+    group2: Series or NumPy array
+
+    returns: float
+    """
+    diff = group1.mean() - group2.mean()
+
+    n1, n2 = len(group1), len(group2)
+    var1 = group1.var()
+    var2 = group2.var()
+
+    pooled_var = (n1 * var1 + n2 * var2) / (n1 + n2)
+    d = diff / math.sqrt(pooled_var)
+    return d
+
+
+def Cov(xs, ys, meanx=None, meany=None):
+    """Computes Cov(X, Y).
+
+    Args:
+        xs: sequence of values
+        ys: sequence of values
+        meanx: optional float mean of xs
+        meany: optional float mean of ys
+
+    Returns:
+        Cov(X, Y)
+    """
+    xs = np.asarray(xs)
+    ys = np.asarray(ys)
+
+    if meanx is None:
+        meanx = np.mean(xs)
+    if meany is None:
+        meany = np.mean(ys)
+
+    cov = np.dot(xs-meanx, ys-meany) / len(xs)
+    return cov
+
+
+def Corr(xs, ys):
+    """Computes Corr(X, Y).
+
+    Args:
+        xs: sequence of values
+        ys: sequence of values
+
+    Returns:
+        Corr(X, Y)
+    """
+    xs = np.asarray(xs)
+    ys = np.asarray(ys)
+
+    meanx, varx = MeanVar(xs)
+    meany, vary = MeanVar(ys)
+
+    corr = Cov(xs, ys, meanx, meany) / math.sqrt(varx * vary)
+
+    return corr
+
+
+def SerialCorr(series, lag=1):
+    """Computes the serial correlation of a series.
+
+    series: Series
+    lag: integer number of intervals to shift
+
+    returns: float correlation
+    """
+    xs = series[lag:]
+    ys = series.shift(lag)[lag:]
+    corr = Corr(xs, ys)
+    return corr
+
+
+def SpearmanCorr(xs, ys):
+    """Computes Spearman's rank correlation.
+
+    Args:
+        xs: sequence of values
+        ys: sequence of values
+
+    Returns:
+        float Spearman's correlation
+    """
+    xranks = pandas.Series(xs).rank()
+    yranks = pandas.Series(ys).rank()
+    return Corr(xranks, yranks)
+
+
+def MapToRanks(t):
+    """Returns a list of ranks corresponding to the elements in t.
+
+    Args:
+        t: sequence of numbers
+    
+    Returns:
+        list of integer ranks, starting at 1
+    """
+    # pair up each value with its index
+    pairs = enumerate(t)
+    
+    # sort by value
+    sorted_pairs = sorted(pairs, key=itemgetter(1))
+
+    # pair up each pair with its rank
+    ranked = enumerate(sorted_pairs)
+
+    # sort by index
+    resorted = sorted(ranked, key=lambda trip: trip[1][0])
+
+    # extract the ranks
+    ranks = [trip[0]+1 for trip in resorted]
+    return ranks
+
+
+def LeastSquares(xs, ys):
+    """Computes a linear least squares fit for ys as a function of xs.
+
+    Args:
+        xs: sequence of values
+        ys: sequence of values
+
+    Returns:
+        tuple of (intercept, slope)
+    """
+    meanx, varx = MeanVar(xs)
+    meany = Mean(ys)
+
+    slope = Cov(xs, ys, meanx, meany) / varx
+    inter = meany - slope * meanx
+
+    return inter, slope
+
+
+def FitLine(xs, inter, slope):
+    """Fits a line to the given data.
+
+    xs: sequence of x
+
+    returns: tuple of numpy arrays (sorted xs, fit ys)
+    """
+    fit_xs = np.sort(xs)
+    fit_ys = inter + slope * fit_xs
+    return fit_xs, fit_ys
+
+
+def Residuals(xs, ys, inter, slope):
+    """Computes residuals for a linear fit with parameters inter and slope.
+
+    Args:
+        xs: independent variable
+        ys: dependent variable
+        inter: float intercept
+        slope: float slope
+
+    Returns:
+        list of residuals
+    """
+    xs = np.asarray(xs)
+    ys = np.asarray(ys)
+    res = ys - (inter + slope * xs)
+    return res
+
+
+def CoefDetermination(ys, res):
+    """Computes the coefficient of determination (R^2) for given residuals.
+
+    Args:
+        ys: dependent variable
+        res: residuals
+        
+    Returns:
+        float coefficient of determination
+    """
+    return 1 - Var(res) / Var(ys)
+
+
+def CorrelatedGenerator(rho):
+    """Generates standard normal variates with serial correlation.
+
+    rho: target coefficient of correlation
+
+    Returns: iterable
+    """
+    x = random.gauss(0, 1)
+    yield x
+
+    sigma = math.sqrt(1 - rho**2)
+    while True:
+        x = random.gauss(x * rho, sigma)
+        yield x
+
+
+def CorrelatedNormalGenerator(mu, sigma, rho):
+    """Generates normal variates with serial correlation.
+
+    mu: mean of variate
+    sigma: standard deviation of variate
+    rho: target coefficient of correlation
+
+    Returns: iterable
+    """
+    for x in CorrelatedGenerator(rho):
+        yield x * sigma + mu
+
+
+def RawMoment(xs, k):
+    """Computes the kth raw moment of xs.
+    """
+    return sum(x**k for x in xs) / len(xs)
+
+
+def CentralMoment(xs, k):
+    """Computes the kth central moment of xs.
+    """
+    mean = RawMoment(xs, 1)
+    return sum((x - mean)**k for x in xs) / len(xs)
+
+
+def StandardizedMoment(xs, k):
+    """Computes the kth standardized moment of xs.
+    """
+    var = CentralMoment(xs, 2)
+    std = math.sqrt(var)
+    return CentralMoment(xs, k) / std**k
+
+
+def Skewness(xs):
+    """Computes skewness.
+    """
+    return StandardizedMoment(xs, 3)
+
+
+def Median(xs):
+    """Computes the median (50th percentile) of a sequence.
+
+    xs: sequence or anything else that can initialize a Cdf
+
+    returns: float
+    """
+    cdf = Cdf(xs)
+    return cdf.Value(0.5)
+
+
+def IQR(xs):
+    """Computes the interquartile of a sequence.
+
+    xs: sequence or anything else that can initialize a Cdf
+
+    returns: pair of floats
+    """
+    cdf = Cdf(xs)
+    return cdf.Value(0.25), cdf.Value(0.75)
+
+
+def PearsonMedianSkewness(xs):
+    """Computes the Pearson median skewness.
+    """
+    median = Median(xs)
+    mean = RawMoment(xs, 1)
+    var = CentralMoment(xs, 2)
+    std = math.sqrt(var)
+    gp = 3 * (mean - median) / std
+    return gp
+
+
+class FixedWidthVariables(object):
+    """Represents a set of variables in a fixed width file."""
+
+    def __init__(self, variables, index_base=0):
+        """Initializes.
+
+        variables: DataFrame
+        index_base: are the indices 0 or 1 based?
+
+        Attributes:
+        colspecs: list of (start, end) index tuples
+        names: list of string variable names
+        """
+        self.variables = variables
+
+        # note: by default, subtract 1 from colspecs
+        self.colspecs = variables[['start', 'end']] - index_base
+
+        # convert colspecs to a list of pair of int
+        self.colspecs = self.colspecs.astype(np.int).values.tolist()
+        self.names = variables['name']
+
+    def ReadFixedWidth(self, filename, **options):
+        """Reads a fixed width ASCII file.
+
+        filename: string filename
+
+        returns: DataFrame
+        """
+        df = pandas.read_fwf(filename,
+                             colspecs=self.colspecs, 
+                             names=self.names,
+                             **options)
+        return df
+
+
+def ReadStataDct(dct_file, **options):
+    """Reads a Stata dictionary file.
+
+    dct_file: string filename
+    options: dict of options passed to open()
+
+    returns: FixedWidthVariables object
+    """
+    type_map = dict(byte=int, int=int, long=int, float=float, 
+                    double=float, numeric=float)
+
+    var_info = []
+    with open(dct_file, **options) as f:
+        for line in f:
+            match = re.search( r'_column\(([^)]*)\)', line)
+            if not match:
+                continue
+            start = int(match.group(1))
+            t = line.split()
+            vtype, name, fstring = t[1:4]
+            name = name.lower()
+            if vtype.startswith('str'):
+                vtype = str
+            else:
+                vtype = type_map[vtype]
+            long_desc = ' '.join(t[4:]).strip('"')
+            var_info.append((start, vtype, name, fstring, long_desc))
+            
+    columns = ['start', 'type', 'name', 'fstring', 'desc']
+    variables = pandas.DataFrame(var_info, columns=columns)
+
+    # fill in the end column by shifting the start column
+    variables['end'] = variables.start.shift(-1)
+    variables.loc[len(variables)-1, 'end'] = 0
+
+    dct = FixedWidthVariables(variables, index_base=1)
+    return dct
+
+
+def Resample(xs, n=None):
+    """Draw a sample from xs with the same length as xs.
+
+    xs: sequence
+    n: sample size (default: len(xs))
+
+    returns: NumPy array
+    """
+    if n is None:
+        n = len(xs)
+    return np.random.choice(xs, n, replace=True)
+
+
+def SampleRows(df, nrows, replace=False):
+    """Choose a sample of rows from a DataFrame.
+
+    df: DataFrame
+    nrows: number of rows
+    replace: whether to sample with replacement
+
+    returns: DataDf
+    """
+    indices = np.random.choice(df.index, nrows, replace=replace)
+    sample = df.loc[indices]
+    return sample
+
+
+def ResampleRows(df):
+    """Resamples rows from a DataFrame.
+
+    df: DataFrame
+
+    returns: DataFrame
+    """
+    return SampleRows(df, len(df), replace=True)
+
+
+def ResampleRowsWeighted(df, column='finalwgt'):
+    """Resamples a DataFrame using probabilities proportional to given column.
+
+    df: DataFrame
+    column: string column name to use as weights
+
+    returns: DataFrame
+    """
+    weights = df[column].copy()
+    weights /= sum(weights)
+    indices = np.random.choice(df.index, len(df), replace=True, p=weights)
+    sample = df.loc[indices]
+    return sample
+
+
+def PercentileRow(array, p):
+    """Selects the row from a sorted array that maps to percentile p.
+
+    p: float 0--100
+
+    returns: NumPy array (one row)
+    """
+    rows, cols = array.shape
+    index = int(rows * p / 100)
+    return array[index,]
+
+
+def PercentileRows(ys_seq, percents):
+    """Given a collection of lines, selects percentiles along vertical axis.
+
+    For example, if ys_seq contains simulation results like ys as a
+    function of time, and percents contains (5, 95), the result would
+    be a 90% CI for each vertical slice of the simulation results.
+
+    ys_seq: sequence of lines (y values)
+    percents: list of percentiles (0-100) to select
+
+    returns: list of NumPy arrays, one for each percentile
+    """
+    nrows = len(ys_seq)
+    ncols = len(ys_seq[0])
+    array = np.zeros((nrows, ncols))
+
+    for i, ys in enumerate(ys_seq):
+        array[i,] = ys
+
+    array = np.sort(array, axis=0)
+
+    rows = [PercentileRow(array, p) for p in percents]
+    return rows
+
+
+def Smooth(xs, sigma=2, **options):
+    """Smooths a NumPy array with a Gaussian filter.
+
+    xs: sequence
+    sigma: standard deviation of the filter
+    """
+    return ndimage.filters.gaussian_filter1d(xs, sigma, **options)
+
+
+class HypothesisTest(object):
+    """Represents a hypothesis test."""
+
+    def __init__(self, data):
+        """Initializes.
+
+        data: data in whatever form is relevant
+        """
+        self.data = data
+        self.MakeModel()
+        self.actual = self.TestStatistic(data)
+        self.test_stats = None
+        self.test_cdf = None
+
+    def PValue(self, iters=1000):
+        """Computes the distribution of the test statistic and p-value.
+
+        iters: number of iterations
+
+        returns: float p-value
+        """
+        self.test_stats = [self.TestStatistic(self.RunModel()) 
+                           for _ in range(iters)]
+        self.test_cdf = Cdf(self.test_stats)
+
+        count = sum(1 for x in self.test_stats if x >= self.actual)
+        return count / iters
+
+    def MaxTestStat(self):
+        """Returns the largest test statistic seen during simulations.
+        """
+        return max(self.test_stats)
+
+    def PlotCdf(self, label=None):
+        """Draws a Cdf with vertical lines at the observed test stat.
+        """
+        def VertLine(x):
+            """Draws a vertical line at x."""
+            thinkplot.Plot([x, x], [0, 1], color='0.8')
+
+        VertLine(self.actual)
+        thinkplot.Cdf(self.test_cdf, label=label)
+
+    def TestStatistic(self, data):
+        """Computes the test statistic.
+
+        data: data in whatever form is relevant        
+        """
+        raise UnimplementedMethodException()
+
+    def MakeModel(self):
+        """Build a model of the null hypothesis.
+        """
+        pass
+
+    def RunModel(self):
+        """Run the model of the null hypothesis.
+
+        returns: simulated data
+        """
+        raise UnimplementedMethodException()
+
+
+def main():
+    pass
+    
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file