Source code for pyqmc.dmc

import os
import numpy as np
import pyqmc.mc as mc
import sys
import h5py


[docs]def limdrift(g, tau, acyrus=0.25):
    """
    Use Cyrus Umrigar's algorithm to limit the drift near nodes.

    :parameter g: a [nconf,ndim] vector
    :parameter tau: time step
    :parameter acyrus: the maximum magnitude
    :returns: The vector with the cut off applied and multiplied by tau.
    """
    tot = np.linalg.norm(g, axis=1) * acyrus
    mask = tot > 1e-8
    taueff = np.ones(tot.shape) * tau
    taueff[mask] = (np.sqrt(1 + 2 * tau * tot[mask]) - 1) / tot[mask]
    return g * taueff[:, np.newaxis]


def get_V2(configs, wf, acc_out):
    if "grad2" in acc_out.keys():
        return acc_out["grad2"]

    nconfig, nelec = configs.configs.shape[0:2]
    v2 = np.zeros(nconfig)
    for e in range(nelec):
        v2 += np.sum(np.abs(wf.gradient(e, configs.electron(e))).T ** 2, axis=1)
    return v2


def propose_drift_diffusion(wf, configs, tstep, e):
    nconfig = configs.configs.shape[0]
    grad = limdrift(np.real(wf.gradient(e, configs.electron(e)).T), tstep)
    gauss = np.random.normal(scale=np.sqrt(tstep), size=(nconfig, 3))
    eposnew = configs.configs[:, e, :] + gauss + grad
    newepos = configs.make_irreducible(e, eposnew)

    # Compute reverse move
    new_grad = limdrift(np.real(wf.gradient(e, newepos).T), tstep)
    forward = np.sum(gauss ** 2, axis=1)
    backward = np.sum((gauss + grad + new_grad) ** 2, axis=1)
    t_prob = np.exp(1 / (2 * tstep) * (forward - backward))

    # Acceptance -- fixed-node: reject if wf changes sign
    wfratio = wf.testvalue(e, newepos)
    ratio = np.abs(wfratio) ** 2 * t_prob
    if not wf.iscomplex:
        ratio *= np.sign(wfratio)
    accept = ratio > np.random.rand(nconfig)
    r2 = np.sum((gauss + grad) ** 2, axis=1)

    return newepos, accept, r2


[docs]def propose_tmoves(wf, configs, energy_accumulator, tstep, e):
    """
    No side effect calculation of t-moves

    Returns:
       new proposed positions
       probability of acceptance
       sum of weights
    """
    moves = energy_accumulator.nonlocal_tmoves(configs, wf, e, tstep)
    t_amplitudes = moves["ratio"] * moves["weight"]
    # print(moves['ratio'])
    forward_probability = np.zeros_like(t_amplitudes)
    forward_probability[t_amplitudes > 0] = t_amplitudes[t_amplitudes > 0]
    norm = 1.0 + np.sum(forward_probability, axis=1)  # EQN 34

    def select_walker(array):
        r = np.random.rand()
        return np.searchsorted(array, r)

    cdf = np.cumsum(forward_probability / norm[:, np.newaxis], axis=1)
    selected_moves = np.apply_along_axis(select_walker, 1, cdf)
    move_selected = selected_moves < t_amplitudes.shape[1]

    selected_moves[move_selected == False] = 0
    newpos = np.zeros((norm.shape[0], 3))
    reverse_ratio = np.zeros((norm.shape[0]))
    for walker, move in enumerate(selected_moves):
        newpos[walker, :] = moves["configs"].configs[walker, move, :]
        reverse_ratio[walker] = moves["ratio"][walker, move]

    newpos = configs.make_irreducible(e, newpos)

    backward_amplitude = t_amplitudes / reverse_ratio[:, np.newaxis]
    backward_amplitude[backward_amplitude < 0] = 0.0
    back_norm = 1.0 + np.sum(backward_amplitude, axis=1)
    acceptance = norm / back_norm

    return newpos, move_selected, acceptance, np.sum(t_amplitudes)


[docs]def dmc_propagate(
    wf,
    configs,
    weights,
    tstep,
    branchcut_start,
    e_trial,
    e_est,
    nsteps=5,
    accumulators=None,
    ekey=("energy", "total"),
):
    """
    Propagate DMC without branching

    :parameter wf: A Wave function-like class. recompute(), gradient(), and updateinternals() are used, as well as anything (such as laplacian() ) used by accumulators
    :parameter configs: Configs object, (nconfig, nelec, 3) - initial coordinates to start calculation.
    :parameter weights: (nconfig,) - initial weights to start calculation
    :parameter tstep: Time step for move proposals. Introduces time step error.
    :parameter nsteps: number of DMC steps to take
    :parameter accumulators: A dictionary of functor objects that take in (coords,wf) and return a dictionary of quantities to be averaged. np.mean(quantity,axis=0) should give the average over configurations. If none, a default energy accumulator will be used.
    :parameter ekey: tuple of strings; energy is needed for DMC weights. Access total energy by accumulators[ekey[0]](configs, wf)[ekey[1]
    :returns: (df,coords,weights)
      df: A list of dictionaries nstep long that contains all results from the accumulators.

      coords: The final coordinates from this calculation.

      weights: The final weights from this calculation

    """
    assert accumulators is not None, "Need an energy accumulator for DMC"
    nconfig, nelec = configs.configs.shape[0:2]
    wf.recompute(configs)

    energy_acc = accumulators[ekey[0]](configs, wf)
    eloc = energy_acc[ekey[1]].real
    v2 = get_V2(configs, wf, energy_acc)
    df = []

    for _ in range(nsteps):
        r2_accepted = np.zeros(nconfig)
        r2_proposed = np.zeros(nconfig)
        prob_acceptance = np.zeros(nconfig)
        tmove_acceptance = np.zeros(nconfig)

        if accumulators[ekey[0]].has_nonlocal_moves():
            for e in range(nelec):  # T-moves
                newepos, mask, probability, ecp_totweight = propose_tmoves(
                    wf, configs, accumulators[ekey[0]], tstep, e
                )
                accept = mask & (probability > np.random.rand(nconfig))
                configs.move(e, newepos, accept)
                wf.updateinternals(e, newepos, mask=accept)
                tmove_acceptance += accept / nelec

        for e in range(nelec):  # drift-diffusion
            newepos, accept, r2 = propose_drift_diffusion(wf, configs, tstep, e)
            configs.move(e, newepos, accept)
            wf.updateinternals(e, newepos, mask=accept)
            r2_proposed += r2
            r2_accepted[accept] += r2[accept]
            prob_acceptance += accept / nelec

        # weights
        elocold = eloc.copy()
        v2old = v2.copy()
        energydat = accumulators[ekey[0]](configs, wf)
        eloc = energydat[ekey[1]].real

        tdamp = r2_accepted / r2_proposed
        v2 = get_V2(configs, wf, energydat)

        Snew = compute_S(e_trial, e_est, branchcut_start, v2, tstep, eloc, nelec)
        Sold = compute_S(e_trial, e_est, branchcut_start, v2old, tstep, elocold, nelec)
        wmult = np.exp(tstep * tdamp * (0.5 * Snew + 0.5 * Sold))
        weights *= wmult
        wavg = np.mean(weights)

        avg = {}
        for k, accumulator in accumulators.items():
            dat = accumulator(configs, wf) if k != ekey[0] else energydat
            for m, res in dat.items():
                avg[k + m] = np.einsum("...i,i...->...", weights, res) / (
                    nconfig * wavg
                )
        avg["weight"] = wavg
        avg["acceptance"] = np.mean(prob_acceptance)
        avg["tmove_acceptance"] = np.mean(tmove_acceptance)
        df.append(avg)
    weight = np.asarray([d["weight"] for d in df])
    avg_weight = weight / np.mean(weight)
    df_ret = {
        k: np.mean([d[k] * w for d, w in zip(df, avg_weight)], axis=0)
        for k in df[0].keys()
    }

    df_ret["weight"] = np.mean(weight)

    return df_ret, configs, weights


def compute_S(e_trial, e_est, branchcut, v2, tau, eloc, nelec):
    e_cut = e_est - eloc
    mask = np.abs(e_cut) > branchcut
    e_cut[mask] = branchcut * np.sign(e_cut[mask])
    denominator = 1 + (v2 * tau / nelec) ** 2

    return e_trial - e_est + e_cut / denominator


[docs]def dmc_propagate_parallel(wf, configs, weights, client, npartitions, *args, **kwargs):
    r"""Parallelizes calls to dmc_propagate by splitting configs

    If npartitions does not evenly divide nconfigs, we need to reweight the results based on the number of configs per parallel task.

    The final result should be equivalent to the non-parallelized case.
    The average weight :math:`w` and the weighted average of observables :math:`\langle O \rangle` are returned.
    Index :math:`i` refers to walker index.

    .. math::
        w = \sum_i w_i / n_{\rm config}
        \qquad\quad \langle O \rangle = \sum_i o_{i}  w_i / \sum_i w_i

    Split over parallel tasks, we need to reweight by number of walkers.
    The average weight :math:`w_p` and weighted average of observables :math:`\langle O\rangle_p` are returned from each task.

    .. math::
        w_p = \sum_j^{{\rm task}\, p} w_j / n_{{\rm config}, p}
        \qquad\quad \langle O \rangle_p = \frac{\sum_j^{{\rm task}\, p} o_{j}  w_j }{ \sum_j^{{\rm task}\, p} w_j }


    The total weight and total average (defined above) are computed from the task weights :math:`w_p` and task averages :math:`\langle O\rangle_p` as

    .. math::
        w = \sum_p w_p n_{{\rm config}, p} /  n_{\rm config},
        \qquad\quad \langle O \rangle = \frac{ \sum_p \langle O\rangle_p  \sum_j^{{\rm task}\, p} w_j }{ \sum_i w_i}.

    We can rewrite the weights using the equations above

    .. math::
        \langle O \rangle &= \frac{ \sum_p \langle O\rangle_p w_p  n_{{\rm config}, p}  }{ w n_{\rm config} }

        &= \sum_p \langle O\rangle_p \frac{w_p n_{{\rm config}, p}}{\sum_p w_p n_{{\rm config}, p}}


    By reweighting the task weights as :math:`\overline{w}_p = w_p n_{{\rm config}, p}`, we can omit the reweighting factor :math:`\frac{n_{{\rm config}, p}}{n_{\rm config}}` (that we use to collect parallel vmc).
    Instead, we use only the reweighting factor :math:`\overline{w}_p / \sum_p \overline{w}_p`

    .. math:: \langle O \rangle = \sum_p \langle O\rangle_p \frac{\overline{w}_p }{\sum_p \overline{w}_p }
    """

    config = configs.split(npartitions)
    weight = np.array_split(weights, npartitions)
    runs = [
        client.submit(dmc_propagate, wf, conf, wt, *args, **kwargs)
        for conf, wt in zip(config, weight)
    ]
    allresults = list(zip(*[r.result() for r in runs]))
    configs.join(allresults[1])
    weights = np.concatenate(allresults[2])
    confweight = np.array([len(c.configs) for c in config], dtype=float)
    weight = np.array([w["weight"] for w in allresults[0]]) * confweight
    weight_avg = weight / np.sum(weight)
    block_avg = {
        k: np.sum(
            [res[k] * ww for res, ww in zip(allresults[0], weight_avg)],
            axis=0,
        )
        for k in allresults[0][0].keys()
    }
    block_avg["weight"] = np.mean(weight)
    return block_avg, configs, weights


[docs]def branch(configs, weights):
    """
    Perform branching on a set of walkers using the 'stochastic comb'

    Walkers are resampled with probability proportional to the weights, and the new weights are all set to be equal to the average weight.

    :parameter configs: (nconfig,nelec,3) walker coordinates
    :parameter weights: (nconfig,) walker weights
    :returns: resampled walker configurations and weights all equal to average weight
    """

    nconfig = configs.configs.shape[0]
    probability = np.cumsum(weights)
    wtot = probability[-1]
    base = np.random.rand()
    newinds = np.searchsorted(
        probability, (base + np.linspace(0, wtot, nconfig)) % wtot
    )
    configs.resample(newinds)
    weights.fill(wtot / nconfig)
    return configs, weights


def dmc_file(hdf_file, data, attr, configs, weights):
    import pyqmc.hdftools as hdftools

    if hdf_file is not None:
        with h5py.File(hdf_file, "a") as hdf:
            if "configs" not in hdf.keys():
                hdftools.setup_hdf(hdf, data, attr)
                configs.initialize_hdf(hdf)
            if "weights" not in hdf.keys():
                hdf.create_dataset("weights", weights.shape)
            hdftools.append_hdf(hdf, data)
            configs.to_hdf(hdf)
            hdf["weights"][:] = weights


[docs]def rundmc(
    wf,
    configs,
    weights=None,
    tstep=0.01,
    nsteps=1000,
    branchtime=5,
    stepoffset=0,
    branchcut_start=10,
    verbose=False,
    accumulators=None,
    ekey=("energy", "total"),
    feedback=1.0,
    hdf_file=None,
    continue_from=None,
    client=None,
    npartitions=None,
    **kwargs,
):
    """
    Run DMC

    :parameter wf: A Wave function-like class. recompute(), gradient(), and updateinternals() are used, as well as anything (such as laplacian() ) used by accumulators
    :parameter configs: (nconfig, nelec, 3) - initial coordinates to start calculation.
    :parameter weights: (nconfig,) - initial weights to start calculation, defaults to uniform.
    :parameter nsteps: number of DMC steps to take
    :parameter tstep: Time step for move proposals. Introduces time step error.
    :parameter branchtime: number of steps to take between branching
    :parameter accumulators: A dictionary of functor objects that take in (coords,wf) and return a dictionary of quantities to be averaged. np.mean(quantity,axis=0) should give the average over configurations. If none, a default energy accumulator will be used.
    :parameter ekey: tuple of strings; energy is needed for DMC weights. Access total energy by accumulators[ekey[0]](configs, wf)[ekey[1]
    :parameter verbose: Print out step information
    :parameter stepoffset: If continuing a run, what to start the step numbering at.
    :returns: (df,coords,weights)
      df: A list of dictionaries nstep long that contains all results from the accumulators.

      coords: The final coordinates from this calculation.

      weights: The final weights from this calculation

    """
    #Don't continue onto a file that's already there.
    if continue_from is not None and hdf_file is not None and os.path.isfile(hdf_file):
        raise RuntimeError(f"continue_from is set but hdf_file={hdf_file} already exists! Delete or rename {hdf_file} and try again.")

    # Restart if hdf_file is there
    if continue_from is None and hdf_file is not None and os.path.isfile(hdf_file):
        continue_from = hdf_file

    # Now we should be sure that there is a file 
    # to continue from, if given.
    if continue_from is not None:
        with h5py.File(continue_from, "r") as hdf:
            stepoffset = hdf["step"][-1] + 1
            configs.load_hdf(hdf)
            weights = np.array(hdf["weights"])
            if "e_trial" not in hdf.keys():
                raise ValueError(
                    "Did not find e_trial in the restart file. This may mean that you are trying to restart from a different version of DMC"
                )
            e_trial = hdf["e_trial"][-1]
            e_est = hdf["e_est"][-1]
            esigma = hdf["esigma"][-1]
            if verbose:
                print(f"Restarting calculation {continue_from} from step {stepoffset}")
    else:
        warmup = 2
        df, configs = mc.vmc(
            wf,
            configs,
            accumulators={ekey[0]: accumulators[ekey[0]]},
            client=client,
            npartitions=npartitions,
            verbose=verbose,
        )
        en = df[ekey[0] + ekey[1]][warmup:]
        eref = np.mean(en).real
        e_trial = eref
        e_est = eref
        esigma = np.sqrt(np.var(en) * np.mean(df["nconfig"]))
        if verbose:
            print("eref start", eref, "esigma", esigma)

    nconfig = configs.configs.shape[0]
    if weights is None:
        weights = np.ones(nconfig)

    npropagate = int(np.ceil(nsteps / branchtime))
    df = []
    for step in range(npropagate):
        if client is None:
            df_, configs, weights = dmc_propagate(
                wf,
                configs,
                weights,
                tstep,
                branchcut_start * esigma,
                e_trial=e_trial,
                e_est=e_est,
                nsteps=branchtime,
                accumulators=accumulators,
                ekey=ekey,
                **kwargs,
            )
        else:
            df_, configs, weights = dmc_propagate_parallel(
                wf,
                configs,
                weights,
                client,
                npartitions,
                tstep,
                branchcut_start * esigma,
                e_trial=e_trial,
                e_est=e_est,
                nsteps=branchtime,
                accumulators=accumulators,
                ekey=ekey,
                **kwargs,
            )

        df_["e_trial"] = e_trial
        df_["e_est"] = e_est
        df_["step"] = step + stepoffset
        df_["esigma"] = esigma
        df_["tstep"] = tstep
        df_["weight_std"] = np.std(weights)
        df_["nsteps"] = branchtime

        dmc_file(hdf_file, df_, {}, configs, weights)
        df.append(df_)
        e_est = estimate_energy(hdf_file, df, ekey)
        e_trial = e_est - feedback * np.log(np.mean(weights)).real
        configs, weights = branch(configs, weights)
        if verbose:
            print(
                "energy",
                df_[ekey[0] + ekey[1]],
                "e_trial",
                e_trial,
                "e_est",
                e_est,
                "sigma(w)",
                df_["weight_std"],
            )

    df_ret = {k: np.asarray([d[k] for d in df]) for k in df[0].keys()}
    return df_ret, configs, weights


def estimate_energy(hdf_file, df, ekey):
    if hdf_file is not None:
        with h5py.File(hdf_file, "r") as f:
            en = f[ekey[0] + ekey[1]][()]
            wt = f["weight"][()]
    else:
        en = np.asarray([d[ekey[0] + ekey[1]] for d in df])
        wt = np.asarray([d["weight"] for d in df])
    warmup = int(len(en) / 4)
    return np.average(en[warmup:], weights=wt[warmup:]).real