Skip to content

Commit

Permalink
Introduce Guesser classes for different contexts (#3753)
Browse files Browse the repository at this point in the history
* Adds Guesser classes (GuesserBase and DefaultGuesser)
* Adds guess_TopologyAttrs method to Universe
* Modifies all Topology parsers to remove guessing in parsing and move it to Universe creation
  • Loading branch information
aya9aladdin authored Oct 19, 2024
1 parent 05876f2 commit 9b69745
Show file tree
Hide file tree
Showing 92 changed files with 2,160 additions and 1,190 deletions.
4 changes: 2 additions & 2 deletions benchmarks/benchmarks/topology.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import MDAnalysis
import numpy as np
from MDAnalysis.topology import guessers
from MDAnalysis.guesser import DefaultGuesser

try:
from MDAnalysisTests.datafiles import GRO
Expand All @@ -26,7 +26,7 @@ def setup(self, num_atoms):

def time_guessbonds(self, num_atoms):
"""Benchmark for guessing bonds"""
guessers.guess_bonds(self.ag, self.ag.positions,
DefaultGuesser(None).guess_bonds(self.ag, self.ag.positions,
box=self.ag.dimensions,
vdwradii=self.vdwradii)

Expand Down
17 changes: 15 additions & 2 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ The rules for this file:
-------------------------------------------------------------------------------
??/??/?? IAlibay, HeetVekariya, marinegor, lilyminium, RMeli,
ljwoods2, aditya292002, pstaerk, PicoCentauri, BFedder,
tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher,
tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher,
yuxuanzhuang, PythonFZ, laksh-krishna-sharma, orbeckst, MattTDavies,
talagayev
talagayev, aya9aladdin

* 2.8.0

Fixes
* Fix Bohrium (Bh) atomic mass in tables.py (PR #3753)
* set `n_parts` to the total number of frames being analyzed if
`n_parts` is bigger. (Issue #4685)
* Catch higher dimensional indexing in GroupBase & ComponentBase (Issue #4647)
Expand Down Expand Up @@ -56,6 +57,15 @@ Fixes
* Fix groups.py doctests using sphinx directives (Issue #3925, PR #4374)

Enhancements
* Removed type and mass guessing from all topology parsers (PR #3753)
* Added guess_TopologyAttrs() API to the Universe to handle attribute
guessing (PR #3753)
* Added the DefaultGuesser class, which is a general-purpose guesser with
the same functionalities as the existing guesser.py methods (PR #3753)
* Added is_value_missing() to `TopologyAttrs` to check for missing
values (PR #3753)
* Added guessed `Element` attribute to the ITPParser to preserve old mass
partial guessing behavior from being broken (PR #3753)
* MDAnalysis now supports Python 3.13 (PR #4732)
* Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class
(Issue #4158, PR #4304)
Expand Down Expand Up @@ -100,6 +110,9 @@ Changes
numpy.testing.assert_allclose #4438)

Deprecations
* Unknown masses are set to 0.0 for current version, this will be depracated
in version 3.0.0 and replaced by :class:`Masses`' no_value_label attribute(np.nan)
(PR #3753)
* The MDAnalysis.anaylsis.encore module has been deprecated in favour of the
mdaencore MDAKit and will be removed in version 3.0.0 (PR #4737)
* The MMTF Reader is deprecated and will be removed in version 3.0
Expand Down
2 changes: 1 addition & 1 deletion package/MDAnalysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
_TOPOLOGY_ATTRS: Dict = {} # {attrname: cls}
_TOPOLOGY_TRANSPLANTS: Dict = {} # {name: [attrname, method, transplant class]}
_TOPOLOGY_ATTRNAMES: Dict = {} # {lower case name w/o _ : name}

_GUESSERS: Dict = {}

# custom exceptions and warnings
from .exceptions import (
Expand Down
2 changes: 1 addition & 1 deletion package/MDAnalysis/analysis/bat.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class to calculate dihedral angles for a given set of atoms or residues


def _sort_atoms_by_mass(atoms, reverse=False):
r"""Sorts a list of atoms by name and then by index
r"""Sorts a list of atoms by mass and then by index
The atom index is used as a tiebreaker so that the ordering is reproducible.
Expand Down
44 changes: 26 additions & 18 deletions package/MDAnalysis/converters/OpenMMParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
===================================================================
.. versionadded:: 2.0.0
.. versionchanged:: 2.8.0
Removed type and mass guessing (attributes guessing takes place
now through universe.guess_TopologyAttrs() API)
Converts an
Expand Down Expand Up @@ -59,8 +62,7 @@
import warnings

from ..topology.base import TopologyReaderBase
from ..topology.tables import SYMB2Z
from ..topology.guessers import guess_types, guess_masses
from ..guesser.tables import SYMB2Z
from ..core.topology import Topology
from ..core.topologyattrs import (
Atomids,
Expand Down Expand Up @@ -108,11 +110,6 @@ def _mda_topology_from_omm_topology(self, omm_topology):
-------
top : MDAnalysis.core.topology.Topology
Note
----
When none of the elements are present in the openmm topolgy, their
atomtypes are guessed using their names and their masses are
then guessed using their atomtypes.
When partial elements are present, values from available elements
are used whereas the absent elements are assigned an empty string
Expand Down Expand Up @@ -184,21 +181,32 @@ def _mda_topology_from_omm_topology(self, omm_topology):
warnings.warn("Element information missing for some atoms. "
"These have been given an empty element record ")
if any(i == 'X' for i in atomtypes):
warnings.warn("For absent elements, atomtype has been "
"set to 'X' and mass has been set to 0.0. "
"If needed these can be guessed using "
"MDAnalysis.topology.guessers.")
warnings.warn(
"For absent elements, atomtype has been "
"set to 'X' and mass has been set to 0.0. "
"If needed these can be guessed using "
"universe.guess_TopologyAttrs("
"to_guess=['masses', 'types']). "
"(for MDAnalysis version 2.x "
"this is done automatically,"
" but it will be removed in 3.0).")

attrs.append(Elements(np.array(validated_elements,
dtype=object)))

else:
atomtypes = guess_types(atomnames)
masses = guess_masses(atomtypes)
wmsg = ("Element information is missing for all the atoms. "
"Elements attribute will not be populated. "
"Atomtype attribute will be guessed using atom "
"name and mass will be guessed using atomtype."
"See MDAnalysis.topology.guessers.")
wmsg = (
"Element information is missing for all the atoms. "
"Elements attribute will not be populated. "
"Atomtype attribute will be guessed using atom "
"name and mass will be guessed using atomtype."
"For MDAnalysis version 2.x this is done automatically, "
"but it will be removed in MDAnalysis v3.0. "
"These can be guessed using "
"universe.guess_TopologyAttrs("
"to_guess=['masses', 'types']) "
"See MDAnalysis.guessers.")

warnings.warn(wmsg)
else:
attrs.append(Elements(np.array(validated_elements, dtype=object)))
Expand Down
2 changes: 1 addition & 1 deletion package/MDAnalysis/converters/ParmEd.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@
import itertools
import warnings

from ..guesser.tables import SYMB2Z
import numpy as np
from numpy.lib import NumpyVersion

from . import base
from ..coordinates.base import SingleFrameReaderBase
from ..topology.tables import SYMB2Z
from ..core.universe import Universe
from ..exceptions import NoDataError

Expand Down
2 changes: 1 addition & 1 deletion package/MDAnalysis/converters/ParmEdParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
import numpy as np

from ..topology.base import TopologyReaderBase, change_squash
from ..topology.tables import Z2SYMB
from ..guesser.tables import Z2SYMB
from ..core.topologyattrs import (
Atomids,
Atomnames,
Expand Down
4 changes: 1 addition & 3 deletions package/MDAnalysis/converters/RDKit.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,7 @@ class RDKitConverter(base.ConverterBase):
from MDAnalysisTests.datafiles import PSF, DCD
from rdkit.Chem.Descriptors3D import Asphericity
u = mda.Universe(PSF, DCD)
elements = mda.topology.guessers.guess_types(u.atoms.names)
u.add_TopologyAttr('elements', elements)
u = mda.Universe(PSF, DCD, to_guess=['elements'])
ag = u.select_atoms("resid 1-10")
for ts in u.trajectory:
Expand Down
16 changes: 9 additions & 7 deletions package/MDAnalysis/converters/RDKitParser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
Expand Down Expand Up @@ -47,7 +47,6 @@
import numpy as np

from ..topology.base import TopologyReaderBase, change_squash
from ..topology import guessers
from ..core.topologyattrs import (
Atomids,
Atomnames,
Expand Down Expand Up @@ -90,16 +89,14 @@ class RDKitParser(TopologyReaderBase):
- Atomnames
- Aromaticities
- Elements
- Types
- Masses
- Bonds
- Resids
- Resnums
- RSChirality
- Segids
Guesses the following:
- Atomtypes
Depending on RDKit's input, the following Attributes might be present:
- Charges
- Resnames
Expand Down Expand Up @@ -156,6 +153,12 @@ class RDKitParser(TopologyReaderBase):
.. versionadded:: 2.0.0
.. versionchanged:: 2.1.0
Added R/S chirality support
.. versionchanged:: 2.8.0
Removed type guessing (attributes guessing takes place now
through universe.guess_TopologyAttrs() API). If atoms types is not
present in the input rdkit molecule as a _TriposAtomType property,
the type attribute get the same values as the element attribute.
"""
format = 'RDKIT'

Expand Down Expand Up @@ -303,8 +306,7 @@ def parse(self, **kwargs):
if atomtypes:
attrs.append(Atomtypes(np.array(atomtypes, dtype=object)))
else:
atomtypes = guessers.guess_types(names)
attrs.append(Atomtypes(atomtypes, guessed=True))
atomtypes = np.char.upper(elements)

# Partial charges
if charges:
Expand Down
1 change: 0 additions & 1 deletion package/MDAnalysis/coordinates/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@
from ..lib.util import store_init_arguments
from . import base
from .timestep import Timestep
from ..topology.core import guess_atom_element
from ..exceptions import NoDataError


Expand Down
23 changes: 10 additions & 13 deletions package/MDAnalysis/core/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -3453,7 +3453,6 @@ def guess_bonds(self, vdwradii=None, fudge_factor=0.55, lower_bound=0.1):
----------
vdwradii : dict, optional
Dict relating atom types: vdw radii
fudge_factor : float, optional
The factor by which atoms must overlap each other to be considered
a bond. Larger values will increase the number of bonds found. [0.55]
Expand All @@ -3477,8 +3476,8 @@ def guess_bonds(self, vdwradii=None, fudge_factor=0.55, lower_bound=0.1):
Corrected misleading docs, and now allows passing of `fudge_factor`
and `lower_bound` arguments.
"""
from ..topology.core import guess_bonds, guess_angles, guess_dihedrals
from .topologyattrs import Bonds, Angles, Dihedrals
from ..guesser.default_guesser import DefaultGuesser

def get_TopAttr(u, name, cls):
"""either get *name* or create one from *cls*"""
Expand All @@ -3490,22 +3489,20 @@ def get_TopAttr(u, name, cls):
return attr

# indices of bonds
b = guess_bonds(
self.atoms,
self.atoms.positions,
vdwradii=vdwradii,
box=self.dimensions,
fudge_factor=fudge_factor,
lower_bound=lower_bound,
)
bondattr = get_TopAttr(self.universe, "bonds", Bonds)
guesser = DefaultGuesser(None, fudge_factor=fudge_factor,
lower_bound=lower_bound,
box=self.dimensions,
vdwradii=vdwradii)
b = guesser.guess_bonds(self.atoms, self.atoms.positions)

bondattr = get_TopAttr(self.universe, 'bonds', Bonds)
bondattr._add_bonds(b, guessed=True)

a = guess_angles(self.bonds)
a = guesser.guess_angles(self.bonds)
angleattr = get_TopAttr(self.universe, 'angles', Angles)
angleattr._add_bonds(a, guessed=True)

d = guess_dihedrals(self.angles)
d = guesser.guess_dihedrals(self.angles)
diheattr = get_TopAttr(self.universe, 'dihedrals', Dihedrals)
diheattr._add_bonds(d)

Expand Down
13 changes: 13 additions & 0 deletions package/MDAnalysis/core/topologyattrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,18 @@ def set_segments(self, sg, values):
"""Set segmentattributes for a given SegmentGroup"""
raise NotImplementedError

@classmethod
def are_values_missing(cls, values):
"""check if an attribute has a missing value
.. versionadded:: 2.8.0
"""
missing_value_label = getattr(cls, 'missing_value_label', None)

if missing_value_label is np.nan:
return np.isnan(values)
else:
return values == missing_value_label

# core attributes

Expand Down Expand Up @@ -1441,6 +1453,7 @@ class Masses(AtomAttr):
attrname = 'masses'
singular = 'mass'
per_object = 'atom'
missing_value_label = np.nan
target_classes = [AtomGroup, ResidueGroup, SegmentGroup,
Atom, Residue, Segment]
transplants = defaultdict(list)
Expand Down
Loading

0 comments on commit 9b69745

Please sign in to comment.