Skip to content

Commit

Permalink
Merge pull request #92 from jasasonc/master
Browse files Browse the repository at this point in the history
Fixing 58b files with formatting issues
  • Loading branch information
jankoslavic authored Nov 6, 2024
2 parents ff59540 + 4a1c7b2 commit eaf4f14
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 15 deletions.
Binary file added data/MPSTD#Set001_2024_10_08_10_27_07.uff
Binary file not shown.
Binary file not shown.
88 changes: 77 additions & 11 deletions pyuff/datasets/dataset_58.py
Original file line number Diff line number Diff line change
Expand Up @@ -1068,17 +1068,20 @@ def _extract58(block_data):
# Body
# split_data = ''.join(split_data[13:])
if binary:
split_data = b''.join(block_data.splitlines(True)[13:])
if dset['byte_ordering'] == 1:
bo = '<'
else:
bo = '>'
if (dset['ord_data_type'] == 2) or (dset['ord_data_type'] == 5):
# single precision - 4 bytes
values = np.asarray(struct.unpack('%c%sf' % (bo, int(len(split_data) / 4)), split_data), 'd')
else:
# double precision - 8 bytes
values = np.asarray(struct.unpack('%c%sd' % (bo, int(len(split_data) / 8)), split_data), 'd')
try:
split_data = b''.join(block_data.splitlines(True)[13:])
if dset['byte_ordering'] == 1:
bo = '<'
else:
bo = '>'
if (dset['ord_data_type'] == 2) or (dset['ord_data_type'] == 5):
# single precision - 4 bytes
values = np.asarray(struct.unpack('%c%sf' % (bo, int(len(split_data) / 4)), split_data), 'd')
else:
# double precision - 8 bytes
values = np.asarray(struct.unpack('%c%sd' % (bo, int(len(split_data) / 8)), split_data), 'd')
except:
raise Exception('Potentially wrong data format (common with binary files from some commercial softwares). Try using pyuff.fix_58b() to fix your file. For more information, see https://github.com/ladisk/pyuff/issues/61')
else:
values = []
split_data = block_data.decode('utf-8', errors='replace').splitlines(True)[13:]
Expand Down Expand Up @@ -1484,3 +1487,66 @@ def prepare_58(


return dataset


def fix_58b(filename,fixed_filename=None):
"""
Opens the UFF file, fixes a common formatting issue and saves the fixed file.
Specifically, it fixes the instance, when closing ' -1' of the dataset is on its own line, and not right after the data.
:param filename: filename of the UFF file to be fixed
:param filename: filename to write the fixed UFF file, if None, the fixed file will be saved as 'filename_fixed.uff'
"""

if not os.path.exists(filename):
raise Exception('Filename does not exist')
try:
# Open the file in binary read mode
with open(filename, 'rb') as fh:
data = fh.read()
except Exception as e:
raise Exception(f'Cannot access the file {filename}: {e}')
else:
try:
lines = data.splitlines(keepends=True)

# Fix 1: Adjust ending ' -1' line
if len(lines) >= 1 and lines[-1].strip() == b'-1':
if len(lines) >= 2:
# Move ' -1' up to the end of the previous line
prev_line = lines[-2].rstrip(b'\r\n')
prev_line += b' -1' + lines[-1][-1:] # Keep the newline character
lines[-2] = prev_line
lines.pop() # Remove the last line
else:
pass

# Fix 2: Adjust 'data\n -1\n -1\n data' patterns
i = 0
while i < len(lines) - 3:
if (lines[i+1].strip() == b'-1' and lines[i+2].strip() == b'-1'):
# Move ' -1' from lines[i+1] to the end of lines[i]
data_line = lines[i].rstrip(b'\r\n') # Remove newline characters
data_line += b' -1' + lines[i+1][-1:] # Add ' -1' and newline
lines[i] = data_line
del lines[i+1] # Remove the now-empty line
# Do not increment i to recheck the new line at position i
else:
i += 1 # Move to the next line

# Reassemble the data
data = b''.join(lines)


# Write the fixed data back to the file
if fixed_filename is None:
base, ext = os.path.splitext(filename)
new_filename = f"{base}_fixed{ext}" #default filename
else:
new_filename = fixed_filename #custom filename
with open(new_filename, 'wb') as fh:
fh.write(data)
print('fixed file saved as:', new_filename)
except Exception as e:
raise Exception(f'Error fixing UFF file: {filename}: {e}')

9 changes: 5 additions & 4 deletions pyuff/pyuff.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from .datasets.dataset_15 import _write15, _extract15, get_structure_15
from .datasets.dataset_18 import _extract18, get_structure_18
from .datasets.dataset_55 import _write55, _extract55, get_structure_55
from .datasets.dataset_58 import _write58, _extract58, get_structure_58
from .datasets.dataset_58 import _write58, _extract58, get_structure_58, fix_58b
from .datasets.dataset_82 import _write82, _extract82, get_structure_82
from .datasets.dataset_151 import _write151, _extract151, get_structure_151
from .datasets.dataset_164 import _write164, _extract164, get_structure_164
Expand Down Expand Up @@ -259,9 +259,10 @@ def read_sets(self, setn=None):
for ii in read_range:
dset.append(self._read_set(ii))
except Exception as msg:
raise Exception('Error when reading ' + str(ii) + '-th data-set: ' + msg.value)
except:
raise Exception('Error when reading data-set(s)')
if hasattr(msg, 'value'):
raise Exception('Error when reading ' + str(ii) + '-th data-set: ' + msg.value)
else:
raise Exception('Error when reading data-set(s).')
if len(dset) == 1:
dset = dset[0]
return dset
Expand Down
14 changes: 14 additions & 0 deletions tests/test_58.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,20 @@ def test_prepare_58():
if x2['type'] != 58:
raise Exception('Not correct type')

def test_fix_58b():
pyuff.fix_58b('./data/MPSTD#Set001_2024_10_08_10_27_07.uff')
corrected_file = pyuff.UFF('./data/MPSTD#Set001_2024_10_08_10_27_07_fixed.uff')
data_1 = corrected_file.read_sets(0)

test_file = pyuff.UFF('./data/MPSTD#Set001_2024_10_08_10_27_07_fixed_test.uff')
data_2 = test_file.read_sets(0)

np.testing.assert_array_almost_equal(data_1['data'], data_2['data'])

# remove the fixed file
if os.path.exists('./data/MPSTD#Set001_2024_10_08_10_27_07_fixed.uff'):
os.remove('./data/MPSTD#Set001_2024_10_08_10_27_07_fixed.uff')

if __name__ == '__main__':
test_read_write_read_given_data()

Expand Down

0 comments on commit eaf4f14

Please sign in to comment.