Merge pull request #92 from jasasonc/master

Fixing 58b files with formatting issues
ladisk · Nov 6, 2024 · eaf4f14 · eaf4f14
2 parents ff59540 + 4a1c7b2
commit eaf4f14
Show file tree

Hide file tree

Showing 5 changed files with 96 additions and 15 deletions.
diff --git a/data/MPSTD#Set001_2024_10_08_10_27_07.uff b/data/MPSTD#Set001_2024_10_08_10_27_07.uff
diff --git a/data/MPSTD#Set001_2024_10_08_10_27_07_fixed_test.uff b/data/MPSTD#Set001_2024_10_08_10_27_07_fixed_test.uff
diff --git a/pyuff/datasets/dataset_58.py b/pyuff/datasets/dataset_58.py
@@ -1068,17 +1068,20 @@ def _extract58(block_data):
         # Body
         # split_data = ''.join(split_data[13:])
         if binary:
-            split_data = b''.join(block_data.splitlines(True)[13:])
-            if dset['byte_ordering'] == 1:
-                bo = '<'
-            else:
-                bo = '>'
-            if (dset['ord_data_type'] == 2) or (dset['ord_data_type'] == 5):
-                # single precision - 4 bytes
-                values = np.asarray(struct.unpack('%c%sf' % (bo, int(len(split_data) / 4)), split_data), 'd')
-            else:
-                # double precision - 8 bytes
-                values = np.asarray(struct.unpack('%c%sd' % (bo, int(len(split_data) / 8)), split_data), 'd')
+            try:     
+                split_data = b''.join(block_data.splitlines(True)[13:])
+                if dset['byte_ordering'] == 1:
+                    bo = '<'
+                else:
+                    bo = '>'
+                if (dset['ord_data_type'] == 2) or (dset['ord_data_type'] == 5):
+                    # single precision - 4 bytes
+                    values = np.asarray(struct.unpack('%c%sf' % (bo, int(len(split_data) / 4)), split_data), 'd')
+                else:
+                    # double precision - 8 bytes
+                    values = np.asarray(struct.unpack('%c%sd' % (bo, int(len(split_data) / 8)), split_data), 'd')
+            except:
+                raise Exception('Potentially wrong data format (common with binary files from some commercial softwares). Try using pyuff.fix_58b() to fix your file. For more information, see https://github.com/ladisk/pyuff/issues/61')
         else:
             values = []
             split_data = block_data.decode('utf-8', errors='replace').splitlines(True)[13:]
@@ -1484,3 +1487,66 @@ def prepare_58(
 
 
     return dataset
+
+
+def fix_58b(filename,fixed_filename=None):
+    """
+    Opens the UFF file, fixes a common formatting issue and saves the fixed file. 
+    Specifically, it fixes the instance, when closing '    -1' of the dataset is on its own line, and not right after the data.
+
+    :param filename: filename of the UFF file to be fixed
+    :param filename: filename to write the fixed UFF file, if None, the fixed file will be saved as 'filename_fixed.uff'
+    """
+
+    if not os.path.exists(filename):
+        raise Exception('Filename does not exist')
+    try:
+        # Open the file in binary read mode
+        with open(filename, 'rb') as fh:
+            data = fh.read()
+    except Exception as e:
+        raise Exception(f'Cannot access the file {filename}: {e}')
+    else:
+        try:
+            lines = data.splitlines(keepends=True)
+
+            # Fix 1: Adjust ending '    -1' line
+            if len(lines) >= 1 and lines[-1].strip() == b'-1':
+                if len(lines) >= 2:
+                    # Move '    -1' up to the end of the previous line
+                    prev_line = lines[-2].rstrip(b'\r\n')
+                    prev_line += b'    -1' + lines[-1][-1:]  # Keep the newline character
+                    lines[-2] = prev_line
+                    lines.pop()  # Remove the last line
+                else:
+                    pass
+
+            # Fix 2: Adjust 'data\n    -1\n    -1\n data' patterns
+            i = 0
+            while i < len(lines) - 3:
+                if (lines[i+1].strip() == b'-1' and lines[i+2].strip() == b'-1'):
+                    # Move '    -1' from lines[i+1] to the end of lines[i]
+                    data_line = lines[i].rstrip(b'\r\n')  # Remove newline characters
+                    data_line += b'    -1' + lines[i+1][-1:]  # Add '    -1' and newline
+                    lines[i] = data_line
+                    del lines[i+1]  # Remove the now-empty line
+                    # Do not increment i to recheck the new line at position i
+                else:
+                    i += 1  # Move to the next line
+
+            # Reassemble the data
+            data = b''.join(lines)
+
+
+            # Write the fixed data back to the file
+            if fixed_filename is None:
+                base, ext = os.path.splitext(filename)
+                new_filename = f"{base}_fixed{ext}" #default filename
+            else:
+                new_filename = fixed_filename #custom filename
+            with open(new_filename, 'wb') as fh:
+                fh.write(data)
+            print('fixed file saved as:', new_filename)
+        except Exception as e:
+            raise Exception(f'Error fixing UFF file: {filename}: {e}')
+
diff --git a/pyuff/pyuff.py b/pyuff/pyuff.py
@@ -49,7 +49,7 @@
 from .datasets.dataset_15 import _write15, _extract15, get_structure_15
 from .datasets.dataset_18 import _extract18, get_structure_18
 from .datasets.dataset_55 import _write55, _extract55, get_structure_55
-from .datasets.dataset_58 import _write58, _extract58, get_structure_58
+from .datasets.dataset_58 import _write58, _extract58, get_structure_58, fix_58b
 from .datasets.dataset_82 import _write82, _extract82, get_structure_82
 from .datasets.dataset_151 import _write151, _extract151, get_structure_151
 from .datasets.dataset_164 import _write164, _extract164, get_structure_164
@@ -259,9 +259,10 @@ def read_sets(self, setn=None):
             for ii in read_range:
                 dset.append(self._read_set(ii))
         except Exception as msg:
-            raise Exception('Error when reading ' + str(ii) + '-th data-set: ' + msg.value)
-        except:
-            raise Exception('Error when reading data-set(s)')
+            if hasattr(msg, 'value'):
+                raise Exception('Error when reading ' + str(ii) + '-th data-set: ' + msg.value)
+            else:
+                raise Exception('Error when reading data-set(s).')
         if len(dset) == 1:
             dset = dset[0]
         return dset

diff --git a/tests/test_58.py b/tests/test_58.py
@@ -206,6 +206,20 @@ def test_prepare_58():
     if x2['type'] != 58:
         raise Exception('Not correct type')
 
+def test_fix_58b():
+    pyuff.fix_58b('./data/MPSTD#Set001_2024_10_08_10_27_07.uff')
+    corrected_file = pyuff.UFF('./data/MPSTD#Set001_2024_10_08_10_27_07_fixed.uff')
+    data_1 = corrected_file.read_sets(0)
+
+    test_file = pyuff.UFF('./data/MPSTD#Set001_2024_10_08_10_27_07_fixed_test.uff')
+    data_2 = test_file.read_sets(0)
+
+    np.testing.assert_array_almost_equal(data_1['data'], data_2['data'])
+
+    # remove the fixed file
+    if os.path.exists('./data/MPSTD#Set001_2024_10_08_10_27_07_fixed.uff'):
+        os.remove('./data/MPSTD#Set001_2024_10_08_10_27_07_fixed.uff')
+
 if __name__ == '__main__':
     test_read_write_read_given_data()