-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_formatting.py
89 lines (80 loc) · 2.71 KB
/
data_formatting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def formatData(path, outpath):
choice = 0
tot_residents = 0
tot_hosp = 0
residents = [[]]
hospitals = [[]]
f = open(path, "r")
f_w = open(outpath, "w", buffering=0)
for line in f.readlines():
line = line.strip()
if(len(line) == 0 or line == '@End'):
continue
if(line == '@PartitionA'):
choice = 1
continue
if(line == '@PartitionB'):
choice = 2
continue
if(line == '@PreferenceListsA'):
choice = 3
continue
if(line == '@PreferenceListsB'):
choice = 4
continue
if(choice == 1):
line_trim = line.replace(' ', '')[:-1]
line_split = line_trim.split(',')
tot_residents = len(line_split)
for i in range(tot_residents):
residents.append([])
if(choice == 2):
line_trim = line.replace(' ', '')[:-1]
line_split = line_trim.split(',')
tot_hosp = len(line_split)
for i in range(tot_hosp):
hospitals.append([])
if(choice == 3):
line_trim = line.replace(' ', '')[:-1]
temp_split = line_trim.split(':')
res = temp_split[0]
r_ind = int(res[1:])
pref_list = temp_split[1].split(',')
for h in pref_list:
h_ind = h[1:]
residents[r_ind].append(h_ind)
if(choice == 4):
line_trim = line.replace(' ', '')[:-1]
temp_split = line_trim.split(':')
hosp = temp_split[0]
h_ind = int(hosp[1:])
pref_list = temp_split[1].split(',')
for r in pref_list:
if(r != ''):
r_ind = r[1:]
hospitals[h_ind].append(r_ind)
f_w.write(str(tot_residents) + '\n')
for i in range(1,tot_residents+1):
s = str(len(residents[i])) + ' '
for h in residents[i]:
s += h + ' '
f_w.write(s + '\n')
for i in range(1,tot_hosp+1):
s = str(len(residents[i])) + ' '
for r in hospitals[i]:
s += r + ' '
f_w.write(s + '\n')
f.close()
f_w.close()
folders = ['master', 'shuffle', 'random']
# sizes = ['10', '50', '100', '200', '500']
sizes = ['1000']
for folder in folders:
for size in sizes:
for i in range(1,6):
fin = 'raw_data/complete/' + size + '_' + size + '/' + folder + '/'
fout = 'data/complete/' + size + '_' + size + '/' + folder + '/'
fin += size + '_' + size + '_' + str(i) + '.txt'
fout += size + '_' + size + '_' + str(i) + '.txt'
print(fin)
formatData(fin, fout)