Source code for taurex.optimizer.multinest

from .optimizer import Optimizer

import numpy as np
import os
from taurex.mpi import get_rank, barrier
from taurex.util.util import read_table, read_error_line, read_error_into_dict, quantile_corner, recursively_save_dict_contents_to_output
import random
from taurex.util.util import weighted_avg_and_std
from taurex import OutputSize


[docs]class MultiNestOptimizer(Optimizer):

    def __init__(self, multi_nest_path=None, observed=None, model=None,
                 sampling_efficiency='parameter',
                 num_live_points=1500,
                 max_iterations=0,
                 search_multi_modes=True,
                 num_params_cluster=None,
                 maximum_modes=100,
                 constant_efficiency_mode=False,
                 evidence_tolerance=0.5,
                 mode_tolerance=-1e90,
                 importance_sampling=False,
                 resume=False,
                 n_iter_before_update=100,
                 multinest_prefix='1-',
                 verbose_output=True, sigma_fraction=0.1):
        super().__init__('Multinest', observed, model, sigma_fraction)

        # sampling chains directory
        self.nest_path = 'chains/'
        self.nclust_par = -1
        # sampling efficiency (parameter, ...)
        self.sampling_eff = sampling_efficiency
        # number of live points
        self.n_live_points = int(num_live_points)
        # maximum no. of iterations (0=inf)
        self.max_iter = int(max_iterations)
        # search for multiple modes
        self.multimodes = search_multi_modes
        self.n_iter_before_update = int(n_iter_before_update)
        # parameters on which to cluster, e.g. if nclust_par = 3, it will
        # cluster on the first 3 parameters only.
        # if ncluster_par = -1 it clusters on all parameters
        self.nclust_par = num_params_cluster
        # maximum number of modes
        self.max_modes = int(maximum_modes)
        # run in constant efficiency mode
        self.const_eff = constant_efficiency_mode
        # set log likelihood tolerance. If change is smaller, multinest will have converged
        self.evidence_tolerance = evidence_tolerance
        self.mode_tolerance = mode_tolerance
        # importance nested sampling
        self.imp_sampling = importance_sampling
        if self.imp_sampling:
            self.multimodes = False

        self.multinest_prefix = multinest_prefix

        self.dir_multinest = multi_nest_path
        if get_rank() == 0:

            if not os.path.exists(self.dir_multinest):
                self.info('Directory %s does not exist, creating',
                          self.dir_multinest)
                os.makedirs(self.dir_multinest)
        barrier()

        self.info('Found directory %s', self.dir_multinest)

        self.resume = resume
        self.verbose = verbose_output

[docs]    def compute_fit(self):
        import pymultinest
        data = self._observed.spectrum
        datastd = self._observed.errorBar
        sqrtpi = np.sqrt(2*np.pi)

        def multinest_loglike(cube, ndim, nparams):
            data = self._observed.spectrum
            datastd = self._observed.errorBar
            # log-likelihood function called by multinest
            fit_params_container = np.array(
                [cube[i] for i in range(len(self.fitting_parameters))])
            chi_t = self.chisq_trans(fit_params_container, data, datastd)

            # print('---------START---------')
            # print('chi_t',chi_t)
            # print('LOG',loglike)
            loglike = -np.sum(np.log(datastd*sqrtpi)) - 0.5 * chi_t
            # print(loglike)
            return loglike

        def multinest_uniform_prior(cube, ndim, nparams):
            # prior distributions called by multinest. Implements a uniform prior
            # converting parameters from normalised grid to uniform prior
            # print(type(cube))
            for idx, priors in enumerate(self.fitting_priors):
                # print(idx,self.fitting_parameters[idx])
                cube[idx] = priors.sample(cube[idx])
                #print('CUBE idx',cube[idx])
            # print('-----------')
        # status = None
        # def dump_call(nSamples,nlive,nPar,physLive,posterior,paramConstr,maxloglike,logZ,INSlogZ,logZerr,context):
        #    status = (nSamples,nlive,nPar,physLive,posterior,paramConstr,maxloglike,logZ,INSlogZ,logZerr,context)

        ndim = len(self.fitting_parameters)
        self.warning('Number of dimensions {}'.format(ndim))
        self.warning('Fitting parameters {}'.format(self.fitting_parameters))

        ncluster = self.nclust_par

        if isinstance(ncluster, float):
            ncluster = int(ncluster)

        if ncluster is not None and ncluster <= 0:
            ncluster = None

        if ncluster is None:
            self.nclust_par = ndim  # For writing to output later on

        self.info('Beginning fit......')
        pymultinest.run(LogLikelihood=multinest_loglike,
                        Prior=multinest_uniform_prior,
                        n_dims=ndim,
                        multimodal=self.multimodes,
                        n_clustering_params=ncluster,
                        max_modes=self.max_modes,
                        n_iter_before_update=self.n_iter_before_update,
                        outputfiles_basename=os.path.join(self.dir_multinest,
                                                          self.multinest_prefix),
                        const_efficiency_mode=self.const_eff,
                        importance_nested_sampling=self.imp_sampling,
                        resume=self.resume,
                        verbose=self.verbose,
                        sampling_efficiency=self.sampling_eff,
                        evidence_tolerance=self.evidence_tolerance,
                        mode_tolerance=self.mode_tolerance,
                        n_live_points=self.n_live_points,
                        max_iter=self.max_iter
                        )

        self.info('Fit complete.....')

        self._multinest_output = self.store_nest_solutions()
        self.debug('Multinest output %s', self._multinest_output)

[docs]    def write_optimizer(self, output):
        opt = super().write_optimizer(output)

        # sampling efficiency (parameter, ...)
        opt.write_scalar('sampling_eff ', self.sampling_eff)
        # number of live points
        opt.write_scalar('num_live_points', self.n_live_points)
        # maximum no. of iterations (0=inf)
        opt.write_scalar('max_iterations', self.max_iter)
        # search for multiple modes
        opt.write_scalar('search_multimodes', self.multimodes)
        # parameters on which to cluster, e.g. if nclust_par = 3, it will cluster on the first 3 parameters only.
        # if ncluster_par = -1 it clusters on all parameters
        opt.write_scalar('nclust_parameter', self.nclust_par)
        # maximum number of modes
        opt.write_scalar('max_modes', self.max_modes)
        # run in constant efficiency mode
        opt.write_scalar('constant_efficiency', self.const_eff)
        # set log likelihood tolerance. If change is smaller, multinest will have converged
        opt.write_scalar('evidence_tolerance', self.evidence_tolerance)
        opt.write_scalar('mode_tolerance', self.mode_tolerance)
        # importance nested sampling
        opt.write_scalar('importance_sampling ', self.imp_sampling)

        return opt

[docs]    def write_fit(self, output):
        fit = super().write_fit(output)

        if self._multinest_output:
            recursively_save_dict_contents_to_output(
                output, self._multinest_output)

        return fit

    # Laziness brought us to this point
    # This function is so big and I cannot be arsed to rewrite this in a nicer way, if some angel does it
    # for me then I will buy them TWO beers.
    def store_nest_solutions(self):
        import pymultinest
        self.warning('Store the multinest results')
        NEST_out = {'solutions': {}}
        data = np.loadtxt(os.path.join(self.dir_multinest,
                                       '{}.txt'.format(self.multinest_prefix)))

        NEST_analyzer = pymultinest.Analyzer(n_params=len(
            self.fitting_parameters), outputfiles_basename=os.path.join(self.dir_multinest, self.multinest_prefix))
        NEST_stats = NEST_analyzer.get_stats()
        NEST_out['NEST_stats'] = NEST_stats
        NEST_out['global_logE'] = (
            NEST_out['NEST_stats']['global evidence'], NEST_out['NEST_stats']['global evidence error'])

        # Bypass if run in multimodes = False. Pymultinest.Analyser does not report means and sigmas in this case
        if len(NEST_out['NEST_stats']['modes']) == 0:
            with open(os.path.join(self.dir_multinest, '{}stats.dat'.format(self.multinest_prefix))) as file:
                lines = file.readlines()
            stats = {'modes': []}
            read_error_into_dict(lines[0], stats)

            text = ''.join(lines[2:])
            # without INS:
            parts = text.split("\n\n")
            mode = {'index': 0}

            modelines = parts[0]
            t = read_table(modelines, title='Parameter')
            mode['mean'] = t[:, 1].tolist()
            mode['sigma'] = t[:, 2].tolist()

            mode['maximum'] = read_table(parts[1], title='Parameter', d=None)[
                :, 1].tolist()
            mode['maximum a posterior'] = read_table(
                parts[2], title='Parameter', d=None)[:, 1].tolist()

            if 'Nested Sampling Global Log-Evidence'.lower() in stats:
                mode['Local Log-Evidence'.lower()
                     ] = stats['Nested Sampling Global Log-Evidence'.lower()]
                mode['Local Log-Evidence error'.lower()
                     ] = stats['Nested Sampling Global Log-Evidence error'.lower()]
            else:
                mode['Local Log-Evidence'.lower()
                     ] = stats['Nested Importance Sampling Global Log-Evidence'.lower()]
                mode['Local Log-Evidence error'.lower()
                     ] = stats['Nested Importance Sampling Global Log-Evidence error'.lower()]

            mode['Strictly Local Log-Evidence'.lower()
                 ] = mode['Local Log-Evidence'.lower()]
            mode['Strictly Local Log-Evidence error'.lower()
                 ] = mode['Local Log-Evidence error'.lower()]

            NEST_out['NEST_stats']['modes'] = [mode]

        modes = []
        modes_weights = []
        chains = []
        chains_weights = []

        if self.multimodes:

            # separate modes. get individual samples for each mode

            # get parameter values and sample probability (=weight) for each mode
            with open(os.path.join(self.dir_multinest, '{}post_separate.dat'.format(self.multinest_prefix))) as f:
                lines = f.readlines()
                for idx, line in enumerate(lines):
                    if idx > 2:  # skip the first two lines
                        if lines[idx-1] == '\n' and lines[idx-2] == '\n':
                            modes.append(chains)
                            modes_weights.append(chains_weights)
                            chains = []
                            chains_weights = []
                    chain = [float(x) for x in line.split()[2:]]
                    if len(chain) > 0:
                        chains.append(chain)
                        chains_weights.append(float(line.split()[0]))
                modes.append(chains)
                modes_weights.append(chains_weights)
            modes_array = []
            for mode in modes:
                mode_array = np.zeros((len(mode), len(mode[0])))
                for idx, line in enumerate(mode):
                    mode_array[idx, :] = line
                modes_array.append(mode_array)
        else:
            # not running in multimode. Get chains directly from file 1-.txt
            modes_array = [data[:, 2:]]
            chains_weights = [data[:, 0]]
            modes_weights.append(chains_weights[0])
            modes = [0]

        modes_weights = np.asarray(modes_weights)
        for nmode in range(len(modes)):
            self.debug('Nmode: {}'.format(nmode))

            mydict = {'type': 'nest',
                      'local_logE': (NEST_out['NEST_stats']['modes'][0]['local log-evidence'],  NEST_out['NEST_stats']['modes'][0]['local log-evidence error']),
                      'weights': np.asarray(modes_weights[nmode]),
                      'tracedata': modes_array[nmode],
                      'fit_params': {}}

            for idx, param_name in enumerate(self.fit_names):

                trace = modes_array[nmode][:, idx]
                q_16, q_50, q_84 = quantile_corner(trace, [0.16, 0.5, 0.84],
                                                   weights=np.asarray(modes_weights[nmode]))
                mydict['fit_params'][param_name] = {
                    'value': q_50,
                    'sigma_m': q_50-q_16,
                    'sigma_p': q_84-q_50,
                    'nest_map': NEST_stats['modes'][nmode]['maximum a posterior'][idx],
                    'mean': NEST_stats['modes'][nmode]['mean'][idx],
                    'nest_sigma': NEST_stats['modes'][nmode]['sigma'][idx],
                    'trace': trace,
                }

            NEST_out['solutions']['solution{}'.format(nmode)] = mydict

        return NEST_out

[docs]    def generate_solution(self, output_size=OutputSize.heavy):

        solution = super().generate_solution(output_size=output_size)

        solution['GlobalStats'] = self._multinest_output['NEST_stats']
        return solution

    def get_samples(self, solution_idx):
        return self._multinest_output['solutions'][f'solution{solution_idx}']['tracedata']

    def get_weights(self, solution_idx):
        return self._multinest_output['solutions'][f'solution{solution_idx}']['weights']

[docs]    def get_solution(self):
        names = self.fit_names
        opt_values = self.fit_values
        opt_map = self.fit_values
        solutions = [
            (k, v) for k, v in self._multinest_output['solutions'].items() if 'solution' in k]
        for k, v in solutions:
            solution_idx = int(k[8:])
            for p_name, p_value in v['fit_params'].items():
                idx = names.index(p_name)
                opt_map[idx] = p_value['nest_map']
                opt_values[idx] = p_value['value']

            yield solution_idx, opt_map, opt_values, [
                ('Statistics', {'local log-evidence': self._multinest_output['NEST_stats']['modes'][solution_idx]['local log-evidence'],
                                'local log-evidence error': self._multinest_output['NEST_stats']['modes'][solution_idx]['local log-evidence error']}),
                ('fit_params', v['fit_params']),
                ('tracedata', v['tracedata']),
                ('weights', v['weights'])]

    @classmethod
    def input_keywords(self):
        return ['multinest', 'pymultinest', ]

    BIBTEX_ENTRIES = [
        """
        @article{ refId0,
        author = {{Buchner, J.} and {Georgakakis, A.} and {Nandra, K.} and {Hsu, L.} and {Rangel, C.} and {Brightman, M.} and {Merloni, A.} and {Salvato, M.} and {Donley, J.} and {Kocevski, D.}},
        title = {X-ray spectral modelling of the AGN obscuring region in the
            CDFS: Bayesian model selection and catalogue},
        DOI= "10.1051/0004-6361/201322971",
        url= "https://doi.org/10.1051/0004-6361/201322971",
        journal = {A\&A},
        year = 2014,
        volume = 564,
        pages = "A125",
        month = "",
        }
        """,
        """
        @ARTICLE{Feroz_multinest,
            author = {{Feroz}, F. and {Hobson}, M.~P. and {Bridges}, M.},
                title = "{MULTINEST: an efficient and robust Bayesian inference tool for cosmology and particle physics}",
            journal = {MNRAS},
            keywords = {methods: data analysis, methods: statistical, Astrophysics},
                year = "2009",
                month = "Oct",
            volume = {398},
            number = {4},
                pages = {1601-1614},
                doi = {10.1111/j.1365-2966.2009.14548.x},
        archivePrefix = {arXiv},
            eprint = {0809.3437},
        primaryClass = {astro-ph},
            adsurl = {https://ui.adsabs.harvard.edu/abs/2009MNRAS.398.1601F},
            adsnote = {Provided by the SAO/NASA Astrophysics Data System}
        }
        """

    ]