#!/usr/bin/env python
""" This script reconstructs the results presented in Tables 4.x.
"""
import warnings
warnings.filterwarnings("ignore")

from statsmodels.tools.eval_measures import rmse
from multiprocessing import Pool
from functools import partial

import pickle as pkl
import numpy as np
import sys
import os

import respy

from respy.python.simulate.simulate_auxiliary import get_estimation_vector

# module wide variables
PROJECT_DIR = os.path.dirname(os.path.realpath(__file__))
PROJECT_DIR = PROJECT_DIR.replace('/recomputation/monte_carlo', '')
sys.path.insert(0, PROJECT_DIR + '/_modules')

from auxiliary_shared import get_choice_probabilities
from auxiliary_shared import write_bootstrap_sample
from auxiliary_shared import get_optimization_info
from auxiliary_shared import process_command_line
from auxiliary_shared import send_notification
from auxiliary_shared import enter_results_dir
from auxiliary_shared import simulate_samples
from auxiliary_monte import process_monte
from auxiliary_monte import write_monte
from auxiliary_shared import EXACT_DIR
from auxiliary_shared import to_string
from auxiliary_shared import get_seeds
from auxiliary_shared import SPEC_DIR
from auxiliary_shared import mkdir_p
from auxiliary_shared import cleanup


def run(is_debug, maxfun, which, seed):
    """ Run a single request.
    """
    # Prepare and change to subdirectory.
    dir_ = 'seed_' + to_string(seed)
    mkdir_p(dir_), os.chdir(dir_)

    # I need to specify some details for the simulations to come.
    sim_args = None
    if not is_debug:
        sim_args = dict()
        sim_args['is_interpolated'] = False
        sim_args['num_draws_emax'] = 100000

    # Read the baseline specification. Several attributes will be modified later to fit the needs
    #  of the bootstrap exercise.
    respy_obj = respy.RespyCls(SPEC_DIR + '/data_' + which + '.ini')

    respy_obj.unlock()
    respy_obj.set_attr('file_est', 'data.respy.dat')
    respy_obj.set_attr('file_sim', 'data.respy.dat')
    respy_obj.set_attr('is_interpolated', True)
    respy_obj.set_attr('num_agents_sim', 1000)
    respy_obj.set_attr('num_agents_est', 100)
    respy_obj.set_attr('maxfun', maxfun)
    respy_obj.lock()

    respy_obj.write_out()

    model_paras = respy_obj.get_attr('model_paras')
    x_true = get_estimation_vector(model_paras, True)

    # I write out a random subsample of 100 individuals for the estimation.
    write_bootstrap_sample(respy_obj, which, seed)

    # Run the actual estimation based on the bootstrap sample and collect the result. The little
    # scratch file provides a signal to the respy package that the only limited interpolation
    # model is to be used.
    if which == 'one':
        open('.structRecomputation.tmp', 'a').close()

    simulate_samples('start', respy_obj, sim_args)

    x_iter, _ = respy.estimate(respy_obj)
    respy_obj.update_model_paras(x_iter)

    simulate_samples('finish', respy_obj, sim_args)

    # I now determine the RMSE for this particular bootstrap iteration.
    probs_true = get_choice_probabilities(EXACT_DIR + '/data_' + which + '/data.respy.info')
    probs_finish = get_choice_probabilities('finish/finish_sample.info')
    stat = rmse(probs_finish, probs_true)

    # Get some information about the optimization
    num_evals, num_steps = get_optimization_info()

    # Return to specification directory
    os.chdir('../')

    # Finishing
    return x_iter.tolist(), x_true.tolist(), stat, num_evals, num_steps


''' Execution of module as script.
'''

if __name__ == '__main__':

    description = 'Assess Monte Carlo performance.'
    is_debug, num_procs  = process_command_line(description)

    # Switch to RSLT_DIR. This separate the results form the source files and eases the updating
    # from the compute servers.
    maxfun, num_boots = 10000, 40
    data = ['one', 'two', 'three']

    if is_debug:
        maxfun, num_boots = 0, 2
        data = ['one']

    source_dir = enter_results_dir('monte_carlo')
    seeds = get_seeds(num_boots)

    cleanup()

    for i, which in enumerate(data):

        dirname = 'data_' + which
        os.mkdir(dirname), os.chdir(dirname)

        process_tasks = partial(run, is_debug, maxfun, which)
        rslts = Pool(num_procs).map(process_tasks, seeds)

        x_iter, x_true, stat, num_evals, num_steps = [], [], [], [], []
        for rslt in rslts:
            x_iter += [rslt[0]]
            stat += [rslt[2]]
            num_evals = [rslt[3]]
            num_steps = [rslt[4]]
        x_true = rslt[1]

        rslt = dict()
        rslt['points'] = process_monte(x_iter, x_true)
        rslt['rmse'] = np.mean(stat)
        rslt['num_evals'] = np.mean(num_evals)
        rslt['num_steps'] = np.mean(num_steps)

        pkl.dump(rslts, open('rslts.monte.pkl', 'wb'))

        os.chdir('../')

        write_monte(i, rslt)

    send_notification('monte')

    os.chdir(source_dir)
