-
Notifications
You must be signed in to change notification settings - Fork 0
/
processor_pytorch.py
142 lines (124 loc) · 7.61 KB
/
processor_pytorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#################################################################################################
# The program is the main 'processing unit' of the project in a way #
# that it calls all the other programs and is the place where global variables #
# are defined. The program can be devided into two major parts: 1. the file parser section #
# and 2. the neural network sectionself. #
# In the second section of the file, i.e. in the neural network section, an FFNN (Feed Forward #
# Neural Network) class is instanciated, which is present in the 'network_pytorch.py' file #
# #
# The author of this program is: #
# Swapnil Wagle #
# Max Planck Institute of Colloids and Interfaces, Potsdam, Germany #
# E-mail id: [email protected] #
#################################################################################################
#! /usr/env/python
import os
from os import path
from file_parser import File_Parser
import network_pytorch
from network_pytorch import Network_pytorch
import numpy
import torch
from torch.autograd import Variable
import torch.optim as optim
# The path for the directory, where all the data is located
path = '/Users/swapnil/Documents/FF_for_swapnil'
# Initialization of the variables
files =[]
atomtypes = []
optypes = []
global i_vectors
global o_vectors
# Reading the atomtypes.txt files, which creates the the index list for the output vectors
f = open('./atomtypes.txt', "r")
i=0
for x in f.readlines():
data = x.split()
atomtypes.append(data[0])
i = i+1
f.close()
# Reading the optypes.txt files, which creates the the index list for the input vectors
i=0
f = open('./optypes.txt', "r")
for x in f.readlines():
data = x.split()
optypes.append(data[0])
i = i+1
f.close()
# The input and output arrays are redeclared as 2-dimensional numpy arrays,
# where the second dimension (the coloumn index) is the length of the input/output index,
# i.e. the optypes and atomtypes
i_vectors = numpy.empty([0, len(optypes)], dtype = numpy.double)
o_vectors = numpy.empty([0, len(atomtypes)*2], dtype = numpy.double)
# This is the first part of the processing unit, i.e. the files parser,
# it is an abstract part of the pasring process, in which the files are listed. The path is then
# sent to another program in the Class 'File_Parser', where it is transformed into numpy arrays
# based on the indexing of the optyeps and atomtypes lists. The numpy arrays (i_vectors and o_vectors)
# are utilized further by the neural network, which is introduced in the second part of this program.
i=0
for r, d, f in os.walk(path):
for file in f:
if file.endswith(".txt"):
files.append(os.path.join(r, file))
for fff in sorted(files):
if ((os.path.exists(fff)) and (os.path.getsize(fff) == 0)):
# print ("Warning type 1: File exists but is empty " , fff)
continue
elif (not (os.path.exists(fff))):
# print ("Warning type 2: txt file does not exists" , fff)
continue
else:
txt_filepath = fff
itp_filename = "lipid_" + os.path.splitext(fff)[0].split('_')[-2] + "_" + os.path.splitext(fff)[0].split('_')[-1] + ".itp"
itp_filepath = os.path.join(os.path.dirname(fff), itp_filename)
if (os.path.exists(itp_filepath) and (os.path.getsize(itp_filepath)) == 0):
# print ("Warning type 1: File exists but is empty " , itp_filepath)
continue
elif (not (os.path.exists(itp_filepath))):
# print ("Warning type 2: itp file does not exists" , itp_filepath)
continue
else:
i_vector = numpy.array([len(optypes)], dtype=numpy.double)
o_vector = numpy.array([len(atomtypes) * 2], dtype=numpy.double)
instance = File_Parser(txt_filepath, itp_filepath, atomtypes, optypes)
(i_vector, o_vector) = zip(instance.file_parser(txt_filepath, itp_filepath, atomtypes, optypes))
i_vectors = numpy.append(i_vectors, i_vector, axis = 0)
o_vectors = numpy.append(o_vectors, o_vector, axis = 0)
i= i+1
nof = i # Total number of files, i.e. number of training-data files
i_vectors = torch.from_numpy(i_vectors)
o_vectors = torch.from_numpy(o_vectors)
# This is the second part of the processing unit, i.e. the neural network section.
# The i_vectors and o_vectors obtained from the file_parser section are combined together
# to generate the training data set for the neural network. The i_vectors and o_vectors are two-
# dimensional torch tensor, which contain the 'stacks' of the input and output tensor for training
# the neural network.
# This section deals with instancing the neural network class (named Network_pytorch) and calling its method 'forward'
# for training the network.
# eta is the learning rate, layers_sizes is a list containg the number of neurons in each of the layers with
# first and last layer being the input and output vectors, respectively.
ffnn = network_pytorch.Network_pytorch(len(optypes), 200, len(atomtypes) * 2) # Instanciating the neural network class
# The arguments are the number of neurons for each layer,
# i.e. input, hidden layers and output layer
loss = torch.nn.MSELoss() # MSE (Mean Squared Error) Loss function
optimizer = optim.SGD(ffnn.parameters(), lr=0.01) # Geadient descent algorithm for parameter optimization
# lr (Learning Rate) is 0.01
iterations = 0
running_loss = 0 # Parameter for running error/loss in the neural network
# prediction
for i_vector, o_vector in zip (i_vectors, o_vectors):
x = i_vector.reshape(-1, len(optypes)).float()
inputs = Variable(x) # Defining the input variable for the neural network class
y = o_vector.reshape(-1, len(atomtypes) * 2).float()
outputs = Variable(y) # Defining the output variable for the neural network class
optimizer.zero_grad() # Making the parameter gradients Zero
predictions = ffnn(inputs)
error = loss(predictions, outputs)
print ('[%5d] loss: %.3f' % (iterations + 1, error))
error.backward() # Backward propagation step
optimizer.step()
running_loss += error.data
iterations += 1
# if iterations % 20 == 19: # print every 2000 mini-batches #Uncomment the following section and remove
# print('[%5d] loss: %.3f' % (iterations + 1, running_loss / 20)) #this gap of lines if you want the code to output the cummulative
# running_loss = 0.0 # error (or loss) in the training process every 20 iterations of the training.