-
Notifications
You must be signed in to change notification settings - Fork 0
/
nivasharma_dna_3_DNA.java
413 lines (343 loc) · 15.8 KB
/
nivasharma_dna_3_DNA.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
import java.util.ArrayList;
/**
* DNA is the carrier of genetic information of living things and is used quite
* a few ways in todays age. What can we find out when looking at DNA?
* To name a few, we can find out to whom it belongs and potential
* relationships for families.
*
* DNA is made up of a sequence of molecules, Some portions are the same or
* similar amongst all humans and other portions have a higher diversity.
* The areas we can look at to identify individuals are called STRs (Short
* Tandem Repeats), this is where short DNA segments are repeated back to back
* ex. AGATAGATAGATACGTACGT Here you see one STR called AGAT repeated three
* times before being stopped by the STR ACGT repeated twice (this is just one
* case out of many representations)
*
* Using multiple STRs we can narrow our search down to be more specific to any
* kind of information we are interested in.
*
* @author Seth Kelley
* @author Aastha Gandhi
*/
public class DNA {
// These two instance variables are initialized and populated by
// createDatabaseOfProfiles() and readSTRsOfInterest()
private Profile[] database; // Holds all of the profile objects.
private String[] STRsOfInterest; // Holds all of the STRs as Strings we are interested in looking for.
// These STRs are going to be used to process the DNA of everyone in the database.
/**
* Initialize the array of Profile objects and the STRs of interest.
*
* @param databaseFile The file containing all of the names and their DNA
* sequences
* @param strsFile The file with all the STRs of interest
*/
public DNA ( String databaseFile, String STRsFile ) {
/*** DO NOT EDIT ***/
createDatabaseOfProfiles(databaseFile); // Calls createDatabase method to initialize the database array
readSTRsOfInterest(STRsFile); // Calls readAllSTRs method to initialize the allSTRs array
}
/**
* Create the database array of profiles and insert all the profiles from file.
*
* Each profile includes a persons' name and two DNA sequences.
*
* 1. Reads the number of profiles from the input file AND create the database array to
* hold that number profiles.
* 2. Reads people profiles from the input file @filename.
* 3. For each person in the file
* a. Creates a Profile object with the information from file (see input file format below).
* Set the profile C1_STRs and C2_STRs to null.
* b. Insert the newly created profile into the next position in the database array (instance variable).
*
* Input file format:
* - 1 line containing an integer with the number of profiles/people in the file, call that number p.
* - for each p profiles in the file
* - 1 line containing the person's name
* - 1 line containing the first sequence of STRs
* - 1 line containing the second sequence of STRs
*
* You can use StdIn.readLine() to read 1 (one) line from the file.
* StdIn.setFile() opens the file filename for reading.
*
* @param filename The input file containing the persons name and DNA sequences
*/
public void createDatabaseOfProfiles ( String filename ) {
StdIn.setFile(filename); // DO NOT remove this line, keep it as the first line in the method.
/* WRITE YOUR CODE HERE */
int p = Integer.parseInt(StdIn.readLine());
database = new Profile[p];
for (int i = 0; i < p; i++) {
String name = StdIn.readLine();
String firstSequence = StdIn.readLine();
String secondSequence = StdIn.readLine();
Profile profile = new Profile(name, null, null, firstSequence, secondSequence);
database[i] = profile;
}
}
/**
* Create the STRsOfInterest array of STR and insert all STRs from file.
*
* 1. Reads the number of STRs from the input file AND create the STRsOfInterest array to
* hold that number STRs.
* 2. For each STR in the file
* Insert into STRsOfInterest
*
* Input file format:
* - 1 line containing an integer with the number of STRs in the file, call that number s.
* - s lines of STRs
*
* You can use StdIn.readLine() to read 1 (one) line from the file.
* StdIn.setFile() opens the file filename for reading.
*
* @param filename The input file containing all the STRs
*/
public void readSTRsOfInterest ( String filename ) {
StdIn.setFile(filename); // DO NOT remove this line, keep as the first line in the method.
/* WRITE YOUR CODE HERE */
int s = Integer.parseInt(StdIn.readLine());
STRsOfInterest = new String[s];
for (int x = 0; x < s; x++) {
STRsOfInterest[x] = StdIn.readLine();
}
}
/**
* Creates the Profile for the unknown DNA sequence from filename.
*
* 1. Set the Profile name to "Unknown" because they are currently Unknown.
* 2. Set the Profile S1_STRs and S2_STRs to null (later to be calculated).
* 3. Set the Profile sequence1 to be the first line of the file.
* 4. Set the Profile sequence2 to be the second line of the file.
* 5. Return the Profile object
*
* File format (only two lines):
* - first line containing a DNA sequence
* - second line containing a DNA sequence
*
* @param filename The input file for the unknown DNA sequence
* @return Returns a Profile object for the unknown DNA sequence
*/
public Profile createUnknownProfile ( String filename ) {
StdIn.setFile(filename); // DO NOT remove this line, keep as the first line in the method.
/* WRITE YOUR CODE HERE */
Profile unknowProfile = new Profile("Unknown", null, null, StdIn.readLine(), StdIn.readLine());
return unknowProfile;
}
/**
* Given a DNA sequence and a singular STR, this method will create a
* STR Object with the STR name and the longest number of repeats of that STR
* within the DNA sequence.
*
* @param sequence The DNA sequence (String) to be looked at
* @param STR The STR (String) to look for in the DNA sequence
* @return The STR object with the name and longest number of repeats
*/
public STR findSTRInSequence ( String sequence, String STR ) {
/* WRITE YOUR CODE HERE */
if (sequence.length() < STR.length()) {
return new STR(STR,0);
}
int max = 0;
int currentRepeats = 0;
int strLength = STR.length();
for (int y = 0; y <= sequence.length() - strLength; y++) {
if (sequence.substring(y, y+strLength).equals(STR)) {
currentRepeats++;
y = y + (strLength - 1);
if (currentRepeats > max) {
max = currentRepeats;
}
}
else {
currentRepeats = 0;
}
}
return new STR(STR,max);
}
/**
* Compute the STRs (S1_STRs and S2_STRs) for the profile.
*
* USE the findSTRInSequence method.
*
* @param profile The profile of the that the method will compute the STRs array for
* @param allSTRs The list of STRs to be looked for in the profiles DNA sequences
*/
public void createProfileSTRs ( Profile profile, String[] allSTRs ) {
/* WRITE YOUR CODE HERE */
STR[] S1_STRs = new STR[allSTRs.length];
STR[] S2_STRs = new STR[allSTRs.length];
for (int a = 0; a < allSTRs.length; a++)
{
S1_STRs[a] = findSTRInSequence(profile.getSequence1(), allSTRs[a]);
S2_STRs[a] = findSTRInSequence(profile.getSequence2(), allSTRs[a]);
}
profile.setS1_STRs(S1_STRs);
profile.setS2_STRs(S2_STRs);
}
/**
* Call createProfileSTRs() for each profile in the database.
*/
public void createDatabaseSTRs() {
/* WRITE YOUR CODE HERE */
for (int i = 0; i < database.length; i++) {
createProfileSTRs(database[i], STRsOfInterest);
}
}
/**
* Compares two STR arrays to determines if they are identical.
*
* Two STR arrays are identical if for every i in the array, the objects
* at s1[i] and s2[i] contain the same information.
* - s1[0] matches s2[0], and
* - s1[1] matches s2[1], and so on.
*
* Assume the @s1 and @s2 are of the same length.
*
* @param s1 STR array from one profile.
* @param s2 STR array from another profile.
* @return Returns true if the objects in the arrays are a complete match, otherwise false
*/
public boolean identicalSTRs ( STR[] s1, STR[] s2 ) {
/* WRITE YOUR CODE HERE */
for (int j = 0; j < s1.length; j++) {
if (!s1[j].equals(s2[j])) {
return false;
}
}
return true; // update the return value
}
/**
* Attempts to find a profile in the database that matches the
* unkown profile's array of STRs found in sequence1.
*
* Use identicalSTRs()
*
* @param unknownProfileS1_STRs The sequence1 STRs of the person the method is searching for.
* @return Returns an ArrayList with all matching profile(s). It will return
* an empty ArrayList if no match is found.
*/
public ArrayList<Profile> findMatchingProfiles ( STR[] unknownProfileS1_STRs ) {
/* WRITE YOUR CODE HERE */
ArrayList<Profile> matchingProfiles = new ArrayList<>();
for (int b = 0; b < database.length; b++) {
if (identicalSTRs(database[b].getS1_STRs(), unknownProfileS1_STRs)) {
matchingProfiles.add(database[b]);
}
}
return matchingProfiles; // update the return value
}
/**
*
* A punnet square is a simple way of discovering all of the potential combinations of
* genotypes that can occur in children, given the genotypes of their parents.
*
* This method acts as a punnet square checker to check if all the STRs in
* the array match between the parents and offspring for any one square in the
* punnet square.
*
* This method used in the findPossibleParents method.
*
* @param firstParent The STRs of one parent
* @param inheritedFromFirstParent The one pairing of STRs for the offspring
* @param secondParent The STRs of the other parent
* @param inheritedFromSecondParent The second pairing of STRs for the offspring
* @return Returns true if:
* - the STRs from the first parent matches the offspring STRs inherited from the first parent.
* AND
* - the STRs from the second parent matches the offspring STRs inherited from the second parent.
*/
public boolean punnetSquare( STR[] firstParent, STR[] inheritedFromFirstParent,
STR[] secondParent, STR[] inheritedFromSecondParent ) {
/* DO NOT EDIT */
for ( int i = 0; i < firstParent.length; i++ ) {
if ( !(firstParent[i].equals(inheritedFromFirstParent[i]) && secondParent[i].equals(inheritedFromSecondParent[i])) ) {
return false; // Returns false if there is a discrepency
}
}
return true;
}
/**
* Looks at the STR sequences of any given person and tries to find the
* potential relatives (parents) of that person based on their STR sequences
*
* @param S1_STRs The first list of STRs contained by the offspring that one
* parent passed down
* @param S2_STRs The second list of STRs contained by the offspring that the
* other parent passed down
* @return Returns the array of profiles that are related
*/
public ArrayList<Profile> findPossibleParents ( STR[] S1_STRs, STR[] S2_STRs ) {
/* FIX THIS METHOD */
ArrayList<Profile> possibleParent1 = new ArrayList<>();
ArrayList<Profile> possibleParent2 = new ArrayList<>();
for ( int i = 0; i < database.length; i++ ) {
if (identicalSTRs(database[i].getS2_STRs(), S1_STRs)) {
possibleParent2.add(database[i]);
}
if (identicalSTRs(database[i].getS1_STRs(), S2_STRs)) {
possibleParent1.add(database[i]);
}
if (identicalSTRs(database[i].getS1_STRs(), S1_STRs)) {
possibleParent2.add(database[i]);
}
if (identicalSTRs(database[i].getS2_STRs(), S2_STRs)) {
possibleParent1.add(database[i]);
}
}
ArrayList<Profile> parentList = new ArrayList<>();
for ( int p1 = 0; p1 < possibleParent1.size(); p1++ ) {
for ( int p2 = 0; p2 < possibleParent2.size(); p2++ ) {
if ( !possibleParent1.get(p1).equals(possibleParent2.get(p2)) ) {
if ( punnetSquare(possibleParent2.get(p2).getS1_STRs(), S1_STRs, possibleParent1.get(p1).getS1_STRs(), S2_STRs)) {
parentList.add(possibleParent1.get(p1));
parentList.add(possibleParent2.get(p2));
} else if (punnetSquare(possibleParent2.get(p2).getS1_STRs(), S2_STRs, possibleParent1.get(p1).getS1_STRs(), S1_STRs)) {
parentList.add(possibleParent1.get(p1));
parentList.add(possibleParent2.get(p2));
} else if (punnetSquare(possibleParent2.get(p2).getS2_STRs(), S1_STRs, possibleParent1.get(p1).getS1_STRs(), S2_STRs)) {
parentList.add(possibleParent1.get(p1));
parentList.add(possibleParent2.get(p2));
} else if (punnetSquare(possibleParent2.get(p2).getS2_STRs(), S1_STRs, possibleParent1.get(p1).getS2_STRs(), S2_STRs)) {
parentList.add(possibleParent1.get(p1));
parentList.add(possibleParent2.get(p2));
}
else if (punnetSquare(possibleParent2.get(p2).getS1_STRs(), S1_STRs, possibleParent1.get(p1).getS2_STRs(), S2_STRs)) {
parentList.add(possibleParent1.get(p1));
parentList.add(possibleParent2.get(p2));
}
else if (punnetSquare(possibleParent2.get(p2).getS1_STRs(), S2_STRs, possibleParent1.get(p1).getS2_STRs(), S1_STRs)) {
parentList.add(possibleParent1.get(p1));
parentList.add(possibleParent2.get(p2));
}
else if (punnetSquare(possibleParent2.get(p2).getS2_STRs(), S2_STRs, possibleParent1.get(p1).getS1_STRs(), S1_STRs)) {
parentList.add(possibleParent1.get(p1));
parentList.add(possibleParent2.get(p2));
}
else if (punnetSquare(possibleParent2.get(p2).getS2_STRs(), S2_STRs, possibleParent1.get(p1).getS2_STRs(), S1_STRs)) {
parentList.add(possibleParent1.get(p1));
parentList.add(possibleParent2.get(p2));
}
}
}
}
return parentList;
}
/**
* Getter for the database/profiles instance variable
*
* @return The database instance variable
*/
public Profile[] getDatabase() {
/* DO NOT EDIT */
return database;
}
/**
* Getter for allSTRs instance variable
*
* @return The allSTRs instance variable
*/
public String[] getSTRsOfInterest() {
/* DO NOT EDIT */
return STRsOfInterest;
}
}