# Author: Maxwell I. Zimmerman <mizimmer@wustl.edu>
# Contributors:
# Copywright (C) 2017, Washington University in St. Louis
# All rights reserved.
# Unauthorized copying of this file, via any medium, is strictly prohibited
# Proprietary and confidential
#######################################################################
# imports
#######################################################################
import glob
import itertools
import mdtraj as md
import numpy as np
import os
from .base_analysis import base_analysis
from .. import tools
from ..md_gen.gromax import Gromax
from ..submissions.os_sub import SPSub, OSWrap
from multiprocessing import Pool
#######################################################################
# code
#######################################################################
def _get_filenames(msm_dir):
"""Returns pdb filenames"""
pdb_filenames = glob.glob(msm_dir + "/centers_masses/State*.pdb")
pdb_filenames_full = np.array(
[os.path.abspath(filename) for filename in np.sort(pdb_filenames)])
return pdb_filenames_full
def _get_state_nums(pdb_filenames):
"""Determines the unique state numbers from pdb filenames"""
state_nums = np.unique(
np.array(
[
filename.split("State")[-1].split("-")[0]
for filename in pdb_filenames]))
return state_nums
def _minimize_energy(minimize_info):
"""multiprocessing helper. Minimizes a structure in its own
direcory. First trjconv's it to a gro, then minimizes it with the
specified minimize wrapper"""
# unpack data
minimize_obj, pdb_filename, output_folder = minimize_info
pdb_base_name = pdb_filename.split("/")[-1].split(".pdb")[0]
gro_output = output_folder + "/" + pdb_base_name + ".gro"
# setup directory
cmd0 = 'mkdir ' + output_folder
# source gromacs file if applicable. Must add to line before
# gromacs command
if minimize_obj.source_file is not None:
cmd1 = 'source ' + minimize_obj.source_file + '\n'
else:
cmd1 = ''
# editconf command
cmd1 += 'gmx editconf -f ' + pdb_filename + ' -o ' + gro_output
cmds = [cmd0, cmd1]
_ = tools.run_commands(cmds, supress=True)
pid = minimize_obj.run(gro_output, output_dir=output_folder)
return
[docs]def minimize_energies(minimize_obj, pdb_filenames, output_folder, n_cpus):
"""Minimizes a set of pdb files.
Inputs
----------
minimize_obj : object,
Minimization wrapper.
pdb_filenames : list,
List of pdb filenames to minimize.
output_folder : str,
The folder to generate output data.
n_cpus : int,
The number of processes to use.
"""
state_names = np.array(
[filename.split("/")[-1].split("-")[0] for filename in pdb_filenames])
output_folders = np.array(
[output_folder + "/" + state_name for state_name in state_names])
minimize_info = list(
zip(
itertools.repeat(minimize_obj), pdb_filenames, output_folders))
pool = Pool(processes=n_cpus)
_ = pool.map(_minimize_energy, minimize_info)
pool.terminate()
return
def _parse_log_for_energy(file_info):
"""Searches file for potential energy"""
filename, _ = file_info
f = open(filename, "r")
f_data = f.readlines()
f.close()
energy = None
for line in f_data:
if line.split()[:3] == ['Potential', 'Energy', '=']:
energy = float(line.split()[-1])
break
return energy
[docs]def parse_logs_for_energies(output_dir, n_cpus=1):
"""Searches through output directory for log files and parses them
for potential energies."""
# get log file names
log_files = np.sort(glob.glob(output_dir + "/*/md.log"))
# parallelize the parsing
file_info = list(zip(log_files, np.arange(len(log_files))))
pool = Pool(processes=n_cpus)
energies = pool.map(_parse_log_for_energy, file_info)
pool.terminate()
return energies
[docs]class MinimizeWrap(base_analysis):
"""Analysis wrapper for minimizing structures and returning a
potential energy.
Parameters
----------
top_file : str,
Filename of the gromacs topology file to be used with each
minimization.
mdp_file : str,
The gromacs parameter file to be used with minimization.
n_cpus : int,
The number of cpus to use for minimization. This is NOT per
minimization, but the total cpus available (each minimization
uses 1 cpu, but is parallelized).
build_full : bool,
Flag to either minimize all structures or to continue previous
minimizations
Attributes
----------
msm_dir : str,
The MSM and adaptive sampling analysis directory.
output_folder : str,
The directory within the msm_dir that contains minimizations.
output_name : str,
The filename of the final rankings.
"""
def __init__(
self, top_file, mdp_file, n_cpus=1, build_full=True, **kwargs):
self.top_file = top_file
self.mdp_file = mdp_file
self.n_cpus = n_cpus
self.g_obj = Gromax(
top_file=top_file, mdp_file=mdp_file, n_cpus=1, n_gpus=None,
submission_obj=SPSub(wait=True), min_run=True, **kwargs)
self.build_full = build_full
@property
def class_name(self):
return "MinimizeWrap"
@property
def config(self):
return {
'top_file': self.top_file,
'mdp_file': self.mdp_file,
'n_cpus': self.n_cpus,
'build_full': self.build_full,
'g_obj': self.g_obj
}
@property
def analysis_folder(self):
return "gromax_minimize"
@property
def base_output_name(self):
return "energy_per_state"
[docs] def run(self):
# determine if analysis was already done
if os.path.exists(self.output_name):
pass
else:
# get the pdb filenames
pdb_filenames = _get_filenames(self.msm_dir)
# optionally minimize all structures
if self.build_full:
cmd = ['mkdir ' + self.output_folder]
_ = tools.run_commands(cmd)
minimize_energies(
self.g_obj, pdb_filenames, self.output_folder,
self.n_cpus)
# minimize non-processed states
else:
n_processed_states = len(
glob.glob(self.output_folder + "/State*"))
minimize_energies(
self.g_obj, pdb_filenames[n_processed_states:],
self.output_folder, self.n_cpus)
# parses log files for energies and saves them
energies = parse_logs_for_energies(
self.output_folder, n_cpus=self.n_cpus)
np.save(self.output_name, energies)