Skip to content

Commit

Permalink
fix polymer molformula
Browse files Browse the repository at this point in the history
  • Loading branch information
Eloy Felix committed Feb 16, 2024
1 parent 9117ba8 commit edc266c
Showing 1 changed file with 53 additions and 17 deletions.
70 changes: 53 additions & 17 deletions libRDChEBI/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,35 +194,71 @@ def get_polymer_formula(molfile):
mol = update_mol_valences(mol)
rwmol = Chem.RWMol(mol)
formulas = []
atoms_in_sgroups = []
atoms_to_remove = []

for sg in Chem.GetMolSubstanceGroups(rwmol):
sub_mol = Chem.RWMol()
# we only need the atoms (with their valences) in SGroups for the formula
for atm in sg.GetAtoms():
atom = rwmol.GetAtomWithIdx(atm)
sub_mol.AddAtom(atom)
atoms_in_sgroups.append(atm)
sg_props = sg.GetPropsAsDict()
if sg_props["TYPE"] in ("SUP", "MUL"):
continue

formula = _get_frag_formula(sub_mol)
if sg.HasProp("LABEL"):
label = sg.GetProp("LABEL")
atoms_in_sgroup = [at_idx for at_idx in sg.GetAtoms()]
atoms_to_remove += atoms_in_sgroup
sg_sub_mol = Chem.RWMol()

for at_idx in atoms_in_sgroup:
atom = rwmol.GetAtomWithIdx(at_idx)
sg_sub_mol.AddAtom(atom)

sg_formula = _get_frag_formula(sg_sub_mol)
conn_atoms = get_conn_atoms(mol, atoms_in_sgroup[0])
remain_formula = ""

if len(conn_atoms) > len(atoms_in_sgroup):
sub_mol_conn = Chem.RWMol()
for at_idx in set(conn_atoms) - set(atoms_in_sgroup):
atom = rwmol.GetAtomWithIdx(at_idx)
sub_mol_conn.AddAtom(atom)
atoms_to_remove.append(at_idx)
remain_formula = _get_frag_formula(sub_mol_conn)

label = sg.GetProp("LABEL") if sg.HasProp("LABEL") else ""

if remain_formula:
formula = f"({sg_formula}){label}{remain_formula}"
else:
label = ""
formula = f"({formula}){label}"
formula = f"({sg_formula}){label}"
formulas.append(formula)

# calc formula for the rest of atoms
rwmol.BeginBatchEdit()
for atm in atoms_in_sgroups:
for atm in atoms_to_remove:
rwmol.RemoveAtom(atm)
rwmol.CommitBatchEdit()
rest_formula = _get_frag_formula(rwmol)

if rest_formula:
formulas.append(rest_formula)
frags = Chem.GetMolFrags(rwmol, asMols=True, sanitizeFrags=False)
remain_formulas = [_get_frag_formula(frag) for frag in frags]
if remain_formulas:
formulas += remain_formulas
return ".".join(formulas)


def get_conn_atoms(mol, atom_idx):
connected_atoms = set()
queue = [atom_idx]
visited = set()

while queue:
current_atom_idx = queue.pop(0)
if current_atom_idx not in visited:
visited.add(current_atom_idx)
connected_atoms.add(current_atom_idx)
neighbors = mol.GetAtomWithIdx(current_atom_idx).GetNeighbors()
for neighbor in neighbors:
neighbor_idx = neighbor.GetIdx()
if neighbor_idx not in visited:
queue.append(neighbor_idx)
return connected_atoms


def get_polymer_mass(molfile, avg=True):
if avg:
func = Descriptors.MolWt
Expand Down

0 comments on commit edc266c

Please sign in to comment.