forked from Hendejac/CpHMD-Analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cphmdanalysis.py
730 lines (666 loc) · 35.8 KB
/
cphmdanalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
"""
The Python script parses and run calculation on the lambda and log files produced by
continuous constant pH molecular dynamics (CpHMD) simulations at independent pH conditions
or with pH replica exchange protocol.
For the latter case, two different classes are used to process the log files
(:class:`log_analysis_charmm`) and (:class:`log_analysis_amber`).
All lambda files are processed with (:class:`lambda_data`).
Plots can be made using (:class:`plotting_lambda_data`) for lambda files and class for
log files.
"""
# Library for CpHMD Analysis
#----------------------------
import numpy as np
from operator import add
import pandas as pd
import re
import glob
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib import cm
import math
##########################################################
# Author: Jack Henderson #
# Date: 6/14/2021 #
# Version: 1.2 #
##########################################################
##########################################################
# #
# Lambda File Processing #
# #
##########################################################
# Used for both CHARMM and AMBER lambda files #
##########################################################
def HH_fitting(ph, hill, pka):
return 1 / (1 + 10**(hill*(pka - ph)))
def compute_pkas(phs, ph_object, type='Macro'):
'''
This function is used to calculate the pKas for all titratable residues.
phs : A list of ph values
ph_object : A list of 'ph_objects' generated using :class:`lambda_data`
type : The default 'Macro' will calculate the macro pKa for all residues the output for each
residue will be the a list of [pKa, Hill].
If set to 'Micro' the micro pKas for all residues with tautomeric states will
be calculated and the output will be a 2D list [[res#, pKa1, hill1],[res#, pKa2, hill2]].
'''
data = []
number_of_sites = 0
if type is 'Macro':
number_of_sites = len(ph_object[0].s)
if type is 'Micro':
number_of_sites = len(ph_object[0].s_x1)
for i in range(number_of_sites):
if type is 'Macro':
s_values = [an_object.s[i] for an_object in ph_object]
diffs = [abs(s_value-0.5) for s_value in s_values]
min_diff = min(diffs)
if min_diff < 0.4:
initial_params = [1, phs[diffs.index(min_diff)]]
try:
fit = curve_fit(HH_fitting, phs, s_values, p0=initial_params)
hill = fit[0][0]
pka = fit[0][1]
data.append([pka, hill])
except:
data.append(['NaN', 'NaN'])
else:
data.append(['NaN', 'NaN'])
if type is 'Micro':
data_sub = []
s_values = [[an_object.s_x1[i] for an_object in ph_object],
[an_object.s_x2[i] for an_object in ph_object]]
diffs = [[abs(s_value[1]-0.5) for s_value in s_values[0]],
[abs(s_value[1]-0.5) for s_value in s_values[1]]]
min_diffs = [min(diffs[0]), min(diffs[1])]
for n, min_diff in enumerate(min_diffs):
if min_diff < 0.4:
initial_params = [1, phs[diffs[n].index(min_diff)]]
try:
fit = curve_fit(HH_fitting, phs, [s[1] for s in s_values[n]], p0=initial_params)
hill = fit[0][0]
pka = fit[0][1]
data_sub.append([s_values[n][0][0], pka, hill])
except:
data_sub.append([s_values[n][0][0], 'NaN', 'NaN'])
else:
data_sub.append([s_values[n][0][0], 'NaN', 'NaN'])
data.append(data_sub)
return data
class lambda_data:
'''
This class will allow you to add lambda file to perform calculations on, append additional lambda
files (add_l_files), compute the unprotonated fraction (compute_all_s_values), and compute the
running average of the unprotonated fraction (compute_all_running_s).
'''
def __init__(self, file_path=''):
'''
This function is used to generate an initial ph object, where the pH objects containes all the
lambda values for that particular pH environment.
file_path : Requies a list of files lambda files arranged from lowest to highest ph environment.
n_itir : Number of Titratable Sites, This includes not only the residue but the tautomers sites.
ititr : Indexes for all titrabtable sites.
n_ires : Number of Titratable Residues, current titratable residues can be Asp, Glu, His, Cys, Lys, and Arg.
ires : The Resid for the titratable site.
ires_raw : Resids including duplicates for lambda and tautomer values.
itauto_raw: ID for all titratable sites for Lambda and X, Cys, Lys, and Arg Type (ID: 0 Lambda); His Type (ID:
1 Lambda & 2 X); Asp & Glu types (ID: 3 Lambda & 4 X).
itauto : ID for all titratable site, but for lambda values only.
info_df : Allows you to print a pandas dataframe containing all the Ititr, Ires, and Itauto values. This
values will also be index according to their position in the pandas data from which is useful
when want to select a specific column of lambda or x values.
lambda_and_x_vals: These are the columns of lambda and x values from the CpHMD lambda file output. You can
select a specific column of data from this list by using the index of the info_df output.
'''
with open(file_path, 'r') as lf:
#### for first line of lambda file. (ititr)
line = lf.readline()
ititr = [int(x) for x in line.split()[2:]]
self.n_ititr = len(ititr) # Number of Titratable Sites
self.ititr = ititr # Index for Titrabtable Sites
#### for second line of lambda file (ires)
line = lf.readline()
ires_raw = [int(x) for x in line.split()[2:]]
ires = sorted(set([int(x) for x in line.split()[2:]]))
self.n_ires = len(ires) # Number of Titratable Residues
self.ires = ires # Unique Resids for all Titratable Residues
self.ires_raw = ires_raw # Resids Including Duplicates for Lambda and Tautomer Values
#### for third line of lambda file (itauto_raw)
line = lf.readline()
itauto_raw = [int(x) for x in line.split()[2:]]
itauto = [int(x) for x in line.split()[2:] if (int(x) == 0 or int(x) == 1 or int(x) == 3)]
self.itauto_raw = itauto_raw # ID for all titratable residues for Lambda and Chi, used for ARG LYS (ID: 0) GLU ASP (ID: 3&4) and HSP (ID: 1&2)
self.itauto = itauto # ID for all titratable residues for Lambda Only
#### for forth line of lambd file (ParK)
line = lf.readline()
park = [float(x) for x in line.split()[2:]]
self.park = park # IDK
#### Start Making a Lambda Files DataFrame here is only the heading information
global info_df
info_df = pd.DataFrame()
info_df['Ititr'] = ititr
info_df['Ires'] = ires_raw
info_df['Itauto'] = itauto_raw
self.info_table = info_df
#### The rest of the lambda file
self.steps = []
self.lambda_and_x_vals = [[] for x in range(self.n_ititr)]
for frame, line in enumerate(lf):
self.steps.append(int(line.split()[0]))
for j in range(self.n_ititr):
self.lambda_and_x_vals[j].append(float(line.split()[j+1]))
def add_l_file(self, file_path=''):
'''
The add_l_file function allows you to append additional lambda files to their corresponding ph objects.
'''
with open(file_path, 'r') as lf:
for line in lf:
if '#' not in line:
for j in range(len(line.split()[1:])):
self.lambda_and_x_vals[j].append(float(line.split()[j+1]))
# Function for finding residues - Work in Progress
def find_residues(self, residue_list):
'''
I am still considering revising this.
'''
resids = []
lambda_columns = []
for n, x in enumerate(self.ires):
for y in residue_list:
if x == y:
resids.append(n)
return resids
def compute_all_s_values(self, cutoff=0.2, output=False, minstep = -1, maxstep = -1):
'''
The compute_all_s_values function allows you to calculate the unprotonated fraction for each residue for each
pH object.
cutoff: This is the lambda and x value cutoff used to denote when the proton is present or absent or
or on one or the other tautomer.
output: Setting this boolean to True will provided the user with a formatted output table of the unprotonated
fraction information for the whole residue and if available each tautomer site as well as the pure and
mixed fractions for that residue.
minstep: This is the step to start calculating the unprotonated fraction. The default value of -1 starts the
calculation at 0.
maxstep: This is the final step used for calculating the unprotonated fraction. The default value of -1 ends
the calculation at the final step.
'''
low_cut = cutoff
up_cut = 1 - cutoff
self.s = []
self.s_x1 = []
self.s_x2 = []
self.mixed = []
if output:
print('ires | itaut | S(unprot) | Pure | Mixed')
for n, i in zip(range(len(self.lambda_and_x_vals)), self.itauto_raw):
# Reset Time
if minstep == -1 or maxstep == -1:
minstep = 0
maxstep = len(self.lambda_and_x_vals[n])
# ASP, GLU, and HIS like - Values are in Agreement - Two Proton Residues
if i == 3 or i == 1:
# Get counts for all state criterion
mixed_count = 0
unprot_count = 0
prot_count = 0
x1_count = 0
x2_count = 0
for count, x in enumerate(zip(self.lambda_and_x_vals[n], self.lambda_and_x_vals[n+1])):
if count >= minstep and count <= maxstep:
if ((x[0] < up_cut and x[0] > low_cut) or (x[1] < up_cut and x[1] > low_cut)):
mixed_count += 1
if (x[0] >= up_cut and (x[1] >= up_cut or x[1] <= low_cut)):
unprot_count += 1
if (x[0] <= low_cut and (x[1] >= up_cut or x[1] <= low_cut)):
prot_count += 1
if i == 3:
if x[0] <= low_cut and x[1] >= up_cut:
x1_count += 1
if x[0] <= low_cut and x[1] <= low_cut:
x2_count += 1
if i == 1:
if x[0] >= up_cut and x[1] >= up_cut:
x1_count += 1
if x[0] >= up_cut and x[1] <= low_cut:
x2_count += 1
# Macro and Micro State Calculations - Try and Else added not tested
mixed = mixed_count / (maxstep - minstep)
pure = 1 - mixed
unprot_frac = unprot_count / ((maxstep - minstep) - mixed_count)
x1_frac = 0
x2_frac = 0
if i == 3:
try:
x1_frac = unprot_count / (unprot_count + x1_count)
x2_frac = unprot_count / (unprot_count + x2_count)
except:
x1_frac = 0
x2_frac = 0
if i == 1:
try:
x1_frac = x1_count / (x1_count + prot_count)
x2_frac = x2_count / (x2_count + prot_count)
except:
x1_frac = 0
x2_frac = 0
# Results for ASP, GLU and HSP like.
self.s.append(unprot_frac)
self.s_x1.append([self.ires_raw[n], x1_frac])
self.s_x2.append([self.ires_raw[n], x2_frac])
self.mixed.append(mixed)
if output:
print('{0:4.0f} {1:0.0f} {2:2.2f} {3:2.2f} {4:2.2f}'.format(self.ires_raw[n], self.itauto_raw[n], unprot_frac, pure, mixed))
print('{0:4.0f} {1:0.0f} {2:2.2f}'.format(self.ires_raw[n], self.itauto_raw[n+1], x1_frac))
print('{0:4.0f} {1:0.0f} {2:2.2f}'.format(self.ires_raw[n], self.itauto_raw[n+1], x2_frac))
# LYS, CYS, and ARG like - Values are in Agreement - One Proton Residues
if i == 0:
# Get counts for all state criterion
mixed_count = 0
unprot_count = 0
for count, x in enumerate(zip(self.lambda_and_x_vals[n])):
if count >= minstep and count <= maxstep:
if (x[0] < up_cut and x[0] > low_cut):
mixed_count += 1
if x[0] >= up_cut:
unprot_count += 1
mixed = mixed_count / (maxstep - minstep)
pure = 1 - mixed
unprot_frac = unprot_count / ((maxstep - minstep) - mixed_count)
# Results
self.s.append(unprot_frac)
self.mixed.append(mixed)
if output:
print('{0:4.0f} {1:0.0f} {2:2.2f} {3:2.2f} {4:2.2f}'.format(self.ires_raw[n], self.itauto_raw[n], unprot_frac, pure, mixed))
def compute_all_running_s(self, cutoff=0.2, minstep = -1, maxstep = -1):
'''
The compute_all_running_s function allows you to calculate the running average of the unprotonated
fraction over the simulation.
cutoff: This is the lambda and x value cutoff used to denote when the proton is present or absent or
or on one or the other tautomer.
minstep: This is the step to start calculating the unprotonated fraction. The default value of -1 starts the
calculation at 0.
maxstep: This is the final step used for calculating the unprotonated fraction. The default value of -1 ends
the calculation at the final step.
'''
low_cut = cutoff
up_cut = 1 - cutoff
self.running_s = [[] for x in self.ires]
residue_count = 0
for n, i in zip(range(len(self.lambda_and_x_vals)), self.itauto_raw):
# Reset Time
if minstep == -1 or maxstep == -1:
minstep = 0
maxstep = len(self.lambda_and_x_vals[n])
# ASP, GLU, and HIS like - Values are in Agreement - Two Proton Residues
if i == 3 or i == 1:
# Get counts for all state criterion
mixed_count = 0
unprot_count = 0
for count, x in enumerate(zip(self.lambda_and_x_vals[n], self.lambda_and_x_vals[n+1])):
if count >= minstep and count <= maxstep:
if ((x[0] < up_cut and x[0] > low_cut) or (x[1] < up_cut and x[1] > low_cut)):
mixed_count += 1
if (x[0] >= up_cut and (x[1] >= up_cut or x[1] <= low_cut)):
unprot_count += 1
# Calculate Running S
running_s_calc = 0
if (count + 1) <= mixed_count:
#running_s_calc = 'NaN'
running_s_calc = 0
else:
running_s_calc = unprot_count / ((count + 1) - mixed_count)
self.running_s[residue_count].append(running_s_calc)
residue_count += 1
# LYS, CYS, and ARG like - Values are in Agreement - One Proton Residues
if i == 0:
# Get counts for all state criterion
mixed_count = 0
unprot_count = 0
for count, x in enumerate(zip(self.lambda_and_x_vals[n])):
if count >= minstep and count <=maxstep:
if (x[0] < up_cut and x[0] > low_cut):
mixed_count += 1
if x[0] >= up_cut:
unprot_count += 1
# Calculate running s
running_s_calc = 0
if (count + 1) <= mixed_count:
#running_s_calc = 'NaN'
running_s_calc = 0
else:
running_s_calc = unprot_count / ((count + 1) - mixed_count)
self.running_s[residue_count].append(running_s_calc)
residue_count += 1
class plotting_lambda_data:
'''
This class will perform all of the standard plotting for CpHMD lambda files allowing you to
plot the running S values and titration curves for individual and multiple residue selections
'''
def plot_running_s(self, phs, resids, titles, xlabel='Steps', steps_to_time_conversion=1, save_fig=False, output='s_conv.png'):
'''
In order to run this function you must have first run the 'compute_all_running_s' function from the
'lambda_data' class.
self: The list of pH objects
phs: list of the pH values that corresponding to the pH objects.
resids: The residue value determined by the 'find_residues' function to denote which running S values
should be plotted.
titles: A list of titles for the plots, this allows you to decide whether you want the title to be
something like 'D4', 'Asp4', or 'Asp 4' ect...
xlabel: The label used for the X-axis. The default is set to 'Steps'
steps_to_time_conversion: A value to multiply the X values by to convert them from steps to time. The
default value is set to 1, which would results in steps.
save_fig: A boolean when set to True allows you to save a png file. The default value is set to False.
output: The output name of png to save. 'save_fig' must be set to True.
'''
colormap = cm.get_cmap('rainbow_r', len(phs))
colors = colormap(np.linspace(0, 1, len(phs)))
plt.close('all')
figcols = 4
figrows = math.ceil(len(resids)/figcols)
fig, axes = plt.subplots(nrows=figrows, ncols=figcols, figsize=(14,10/figcols*figrows))
axes = axes.flatten()
title = titles
for index in range(figcols*figrows):
if index < len(resids):
for n, ph in enumerate(phs):
# Plot Data
axes[index].plot([x*steps_to_time_conversion for x in range(len(self[n].running_s[resids[index]]))],
[y for y in self[n].running_s[resids[index]]],
color=colors[n])
# Plot Details
axes[index].set_title(title[index])
axes[index].set_ylabel('S',fontsize=15)
axes[index].set_xlabel('{}'.format(xlabel), fontsize=15)
axes[index].set_ylim(0, 1)
else:
axes[index].remove()
legend_poses = [[0.35, 0.95], [0.56, 0.95], [0.81, 0.95], [1.06, 0.95]]
if len(resids) == 1:
pose = legend_poses[0]
if len(resids) == 2:
pose = legend_poses[1]
if len(resids) == 3:
pose = legend_poses[2]
if len(resids) >= 4:
pose = legend_poses[3]
fig.legend(labels=phs, # The labels for each line
bbox_to_anchor=pose,
borderaxespad=0.05, # Small spacing around legend box
title="pH Values" # Title for the legend
)
plt.tight_layout()
if save_fig == True:
plt.savefig('{}'.format(output), dpi=300, transparent=False, bbox_inches='tight')
# Note: There seems to be some strange issue where when a residue doesn't titrate the
# x-axis labels don't print. Others online have seen this bug too, but there doesn't
# appear to be a clear fix.
def plot_titration_curves(self, phs, resids, titles, xrange=[0, 14], save_fig=False, output='titration_curves.png'):
'''
In order to run this function you must have first run the 'compute_all_s_values' function from the
'lambda_data' class.
self: The list of pH objects
phs: list of the pH values that corresponding to the pH objects.
resids: The residue value determined by the 'find_residues' function to denote which running S values
should be plotted.
titles: A list of titles for the plots, this allows you to decide whether you want the title to be
something like 'D4', 'Asp4', or 'Asp 4' ect...
xrange: The range to use for the X axis and is in pH units.
save_fig: A boolean when set to True allows you to save a png file. The default value is set to False.
output: The output name of png to save. 'save_fig' must be set to True.
'''
plt.close('all')
# Get the S_Values
s_values = []
for res in resids:
s_res_sub = []
for n, ph in enumerate(phs):
s_res_sub.append(self[n].s[res])
s_values.append(s_res_sub)
# Calculate the pKas
pkas = compute_pkas(phs, self)
# Start Plotting
figcols = 4
figrows = math.ceil(len(resids)/figcols)
fig, axes = plt.subplots(nrows=figrows, ncols=figcols, figsize=(14,10/figcols*figrows))
axes = axes.flatten()
title = titles
for index in range(figcols*figrows):
if index < len(resids):
# Plot Data
axes[index].plot(phs, s_values[index], 'o', color='Black')
if pkas[resids[index]][0] is not 'NaN':
# Plot pKa line
axes[index].axvline(pkas[resids[index]][0], color='red')
# Plot fit
if abs(min(s_values[index])-max(s_values[index])) > 0.1:
local_xs = np.linspace(xrange[0], xrange[1], 100)
axes[index].plot(local_xs, [HH_fitting(pkas[resids[index]][0], -1*pkas[resids[index]][1], x) for x in local_xs], color='Black')
else:
axes[index].hlines(np.mean(s_values[index]), 0, 14, color='black')
# Plot Details
axes[index].set_title(title[index])
axes[index].set_xlim(xrange[0], xrange[1])
axes[index].set_xlabel('pH', fontsize=15)
#axes[index].set_xticks([x for x in range(xrange[0], xrange[1]+1, 2)])
axes[index].set_ylim(0, 1)
axes[index].set_ylabel('Unprot. Fraction', fontsize=12)
else:
axes[index].remove()
plt.tight_layout()
if save_fig == True:
plt.savefig('{}'.format(output), dpi=300, transparent=False)
##########################################################
# #
# Log File Processing #
# #
##########################################################
# Used for CHARMM CpHMD, CHARMM version >C38 #
# and the Amber Async Replica Exchange #
##########################################################
class log_analysis_charmm:
'''
This class will allow you to process the CHARMM (version >c38) log files and calculate the exchange
frequencies between pH replicas and plot each replica walk.
'''
def __init__(self, files_path=''):
'''
file_path: This needs to be a 2D list of log files arranged by simulation stages/runs then
by replica with the replicas in ascending order.
exchange_frq: can be used to print the exchange frequencies between the pH replicas.
'''
# Step 1: Using the multidimensional array store the data.
storage = []
# Gather File Info -----
cpus_used = [] # This is the list of the #cpus used.
step_size = 0
# ----------------------
for rep in range(len(files_path[0])):
storage_sub = []
for stage in range(len(files_path)):
with open(files_path[stage][rep], 'r') as f:
flag = 0
for line in f:
if 'PH-REX>' in line:
flag += 1
statement = str(line[9:16]).strip()
neighbor = int(line[36:40])
step = int(line[44:])
if rep % len(files_path[0]) == 0 and flag == 1: # store information
step_size = step
cpus = neighbor
cpus_used.append(cpus) # This keeps a memory of the number of #cpu changes
if rep == 0 or rep == len(files_path[0])-1:
storage_sub.append([step, statement, neighbor/cpus_used[stage]])
storage_sub.append([step+step_size, 'N/A', 'N/A'])
else:
storage_sub.append([step, statement, neighbor/cpus_used[stage]])
storage.append(storage_sub)
# Step 2: Construct the RepWalk and Calculate the Exchange Rates
self.repwalk = []
repwalk_list = [rep for rep in range(len(storage))]
self.repwalk.append([val for val in repwalk_list])
exchange_frq_data = [0 for x in range(len(storage)-1)]
for step in range(len(storage[1])):
if (step+1) % 2 != 0: # Down Swap for Odd Steps, if odd
for i in [x for x in range(0, len(storage))]:
y = storage[i][step]
if i > y[2]:
if'ACCEPT' in y[1]:
pose1 = 0
pose2 = 0
for pose, ph in enumerate(repwalk_list):
if ph == i:
pose1 = pose
if ph == y[2]:
pose2 = pose
exchange_frq_data[int(y[2])] += 1
repwalk_list[pose1], repwalk_list[pose2] = repwalk_list[pose2], repwalk_list[pose1]
else: # Up Swap for Even Steps, else even
for i in [x for x in range(1, len(storage)-1)]:
y = storage[i][step]
if i < y[2]:
if 'ACCEPT' in y[1]:
pose1 = 0
pose2 = 0
for pose, ph in enumerate(repwalk_list): # locate the position of pH for swap
if ph == i:
pose1 = pose
exchange_frq_data[i] += 1
if ph == y[2]:
pose2 = pose
repwalk_list[pose1], repwalk_list[pose2] = repwalk_list[pose2], repwalk_list[pose1]
self.repwalk.append([val for val in repwalk_list])
self.exchange_frq = []
for n, x in enumerate(exchange_frq_data):
self.exchange_frq.append([n, n+1, x*2/len(storage[0])])
def plot_replica_walk(self, xlabel='Step', steps_to_time_conversion=1, save_fig=False, output='repwalk.png'):
'''
Once the replica exchange files have been processed this function can be used to plot the replica
walks.
xlabel: The label used for the X-axis. The default is set to 'Steps'
steps_to_time_conversion: A value to multiply the X values by to convert them from steps to time. The
default value is set to 1, which would results in steps.
save_fig: A boolean when set to True allows you to save a png file. The default value is set to False.
output: The output name of png to save, default name is 'repwalk.png'. 'save_fig' must be set to True.
'''
plt.close('all')
figcols = 4
figrows = math.ceil(len(self.repwalk[0])/figcols)
fig, axes = plt.subplots(nrows=figrows, ncols=figcols, figsize=(14,10/figcols*figrows))
axes = axes.flatten()
for index in range(figcols*figrows):
if index < len(self.repwalk[0]):
# Plot Data
axes[index].plot([x*steps_to_time_conversion for x in range(len(self.repwalk))], [y[index] for y in self.repwalk], '-', color='Black')
# Plot Details
axes[index].set_title('Rep: {}'.format(index))
axes[index].set_ylabel('pH Replica', fontsize=15)
axes[index].set_ylim(0, len(self.repwalk[0])-1)
axes[index].set_xlabel('{}'.format(xlabel),fontsize=15)
else:
axes[index].remove()
plt.tight_layout()
if save_fig == True:
plt.savefig('{}'.format(output), dpi=300, transparent=False)
class log_analysis_amber:
'''
This class will allow you to process the AMBER Async CpHMD log files and calculate the exchange
frequencies between pH replicas and plot each replica walk.
'''
def __init__(self, files_path=''):
'''
file_path: This needs to be a 1D list of the log files for each stage of the Async CpHMD replica
exchange.
exchange_frq: can be used to print the exchange frequencies between the pH replicas.
'''
self.rwalk_data = []
all_exchange_flags = []
prev_line = []
exch_flag = 0 # this flag is for testing
for stage, file in enumerate(files_path):
with open(file, 'r') as f:
for count, line in enumerate(f):
split_line = re.split('\s+', line)
split_line = split_line[2:-1]
# Make the decoder
key_line = []
if stage > 0 and count == 0:
key_line = prev_line
# This is the heart of the matter by keeping track of the positions
# of the replicas.
decode = {split_line[x]:key_line[x] for x in range(len(split_line))}
#print('HERE {}'.format(key_line))
#print('HERE {}'.format(split_line))
save = prev_line # save the previous line so you can examine if a swap occured.
# Perform the Decoding - Decoding occurs for everything after the first stage.
the_line = [] # This is what is actually used for the first stage it's the
# same as split_line but after that it is decoded.
if stage == 0:
prev_line = split_line
the_line = split_line
else:
for entry in split_line:
the_line.append(decode[entry])
#print(f'cur: {count} {the_line}')
#print(f'sav: {count} {save}')
# Calculate the Exchange Frequency
exch_flag += 1
if exch_flag > 1: # The Flag here makes sure the Exch. Freq. is started on second step.
exchange_flags = []
for incr in range(len(the_line)-1):
a = the_line[incr:incr+2]
b = save[incr:incr+2]
if a == b[::-1]:
exchange_flags.append(1)
else:
exchange_flags.append(0)
all_exchange_flags.append(exchange_flags)
#print(f'Exc: {count} {exchange_flags}')
# For decoder to remember the previously decoded lines
if stage > 0:
prev_line = the_line
# The Following finds where the replica is and records the walk.
position_collection = []
for n in range(len(split_line)):
for position, val in enumerate(the_line):
if n == int(val):
position_collection.append(position)
self.rwalk_data.append(position_collection)
final_exchange_count = [0 for x in range(len(all_exchange_flags[0]))]
for exchange_line in all_exchange_flags:
for n_exchange, exchange_flag in enumerate(exchange_line):
final_exchange_count[n_exchange] += exchange_flag
#print(f'Final Exchange Count: {final_exchange_count}')
#print(f'All Exchange Flags: {len(all_exchange_flags)}')
self.exchange_frq = []
for rep, val in enumerate(final_exchange_count):
#print(f'{val} {len(all_exchange_flags)} {val/len(all_exchange_flags)}')
self.exchange_frq.append([rep, rep+1, val/len(all_exchange_flags)])
def plot_replica_walk(self, xlabel='Step', steps_to_time_conversion=1, save_fig=False, output='repwalk.png'):
'''
Once the replica exchange files have been processed this function can be used to plot the replica
walks.
xlabel: The label used for the X-axis. The default is set to 'Steps'
steps_to_time_conversion: A value to multiply the X values by to convert them from steps to time. The
default value is set to 1, which would results in steps.
save_fig: A boolean when set to True allows you to save a png file. The default value is set to False.
output: The output name of png to save. 'save_fig' must be set to True.
'''
plt.close('all')
figcols = 4
figrows = math.ceil(len(self.rwalk_data[0])/figcols)
fig, axes = plt.subplots(nrows=figrows, ncols=figcols, figsize=(14,10/figcols*figrows))
axes = axes.flatten()
for index in range(figcols*figrows):
if index < len(self.rwalk_data[0]):
# Plot Data
axes[index].plot([x*steps_to_time_conversion for x in range(len(self.rwalk_data))], [y[index] for y in self.rwalk_data], '-', color='Black')
# Plot Details
axes[index].set_title('Rep: {}'.format(index))
axes[index].set_ylabel('pH Replica', fontsize=15)
axes[index].set_ylim(0, len(self.rwalk_data[0])-1)
axes[index].set_xlabel('{}'.format(xlabel),fontsize=15)
else:
axes[index].remove()
plt.tight_layout()
if save_fig == True:
plt.savefig('{}'.format(output), dpi=300, transparent=False)