Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NF(WiP): SpecObject .find and .__iadd__ #418

Draft
wants to merge 33 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
0325398
Dummy empty commit to facilitate long lived PR
yarikoptic Jan 17, 2019
d822155
Merge branch 'master' into nf-diff
chaselgrove Feb 7, 2019
1d33124
Merge branch 'master' into nf-diff
chaselgrove Feb 12, 2019
2fe2198
WIP: added venv to diff
chaselgrove Feb 20, 2019
f0548ce
Merge branch 'master' into rf-diff
chaselgrove Feb 21, 2019
0e555da
Merge branch 'master' into rf-diff
chaselgrove Mar 7, 2019
4f84955
Merge branch 'master' into rf-diff
chaselgrove Mar 14, 2019
ec5f058
Merge branch 'master' into rf-diff
chaselgrove Mar 20, 2019
66d8b67
replacing SpecObject._comparison_fields with _diff_cmp_fields + _diff…
chaselgrove Mar 20, 2019
3ab7a04
refactored _satisfied_by() and _identical_to() in SpecObject
chaselgrove Mar 21, 2019
9599683
added class SpecDiff
chaselgrove Mar 21, 2019
eaf140d
added _collection_attribute to CondaDistribution, GitDistribution, SV…
chaselgrove Apr 2, 2019
029dd3d
using SpecDiff for diff interface
chaselgrove Apr 2, 2019
1ac7a92
Merge branch 'master' into rf-diff
chaselgrove Apr 2, 2019
17525c7
supporting files lists in SpecDiff
chaselgrove Apr 16, 2019
138c2a6
BF: in SpecDiff, checking for collection attribute in specobject clas…
chaselgrove Apr 16, 2019
39f5f4e
added SpecDiff attributes a_only, b_only, and diffs
chaselgrove Apr 16, 2019
cf5db61
in SpecDiff:
chaselgrove Apr 17, 2019
df6d69d
added venv hierarchy to diff
chaselgrove Apr 17, 2019
c502961
Merge branch 'master' into rf-diff
chaselgrove Apr 17, 2019
f0329c9
Merge branch 'master' into rf-diff
chaselgrove Apr 18, 2019
6e9178f
NF(WiP): SpecObject .find and .__iadd__
yarikoptic May 16, 2019
85cae7f
temp comments
yarikoptic May 16, 2019
c4eb4fb
BF(TST): establish FakeDistribution to mock test our distributions di…
yarikoptic May 23, 2019
47a4661
Merge remote-tracking branch 'origin/master' into enh-find-extend-specs
yarikoptic May 30, 2019
0d1209e
NF: cfg retrace.only_with_files to make possible including packages …
yarikoptic May 30, 2019
14065c7
ENH: -t, --tracer option for retrace to be able to choose which ones …
yarikoptic May 30, 2019
d6bf28b
BF: tracer_classes is a dict now, go through .values
yarikoptic Sep 12, 2019
e64f317
BF: give a fake tracer a unique index
yarikoptic Sep 12, 2019
b1a811d
Merge remote-tracking branch 'origin/master' into enh-find-extend-specs
yarikoptic Sep 12, 2019
db188c2
BF: if there is no value, we just assign it, not try to append
yarikoptic Sep 12, 2019
ba9cd20
renamed SpecObject._cmp_id to _diff_id (for consistency)
chaselgrove Oct 9, 2019
3fffc18
Merge branch 'master' into enh-find-extend-specs
chaselgrove Nov 5, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions reproman.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
[general]
inventory_file = ~/inventory.yml

[retrace]
# report only the "packages" which have associated files
# It is cheap to gather information on all packages in some environments
# (conda, virtualenv, etc) so by default we would report all
only_with_files = false

[aws]
access_key_id = AWS_ACCESS_KEY
secret_access_key = AWS_SECRET_ACCESS_KEY
Expand Down
262 changes: 235 additions & 27 deletions reproman/distributions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,11 @@ class SpecObject(object):
# needed (or make sure the trivial case is handled)

# Fields used to establish the "identity" of the specobject for the
# purposes of diff
# purposes of finding the one and diff
# TODO: rename to e.g. _id_fields?
_diff_cmp_fields = tuple()
# Fields of the primary interest when showing diff
_diff_fields = tuple()
# Fields used in determination of comparison (satisfied_by and identical_to)
_comparison_fields = tuple()

@property
def _diff_cmp_id(self):
Expand All @@ -59,20 +58,15 @@ def _diff_cmp_id(self):
return tuple(getattr(self, a) for a in self._diff_cmp_fields)

@property
def _cmp_id(self):
if not self._comparison_fields:
# Might need to be gone or some custom exception
raise RuntimeError(
"Cannot establish identity of %r since _comaprison_fields "
"are not defined" % self)
return tuple(getattr(self, a) for a in self._comparison_fields)
def _diff_id(self):
return self._diff_cmp_id + self._diff_vals

@property
def _diff_vals(self):
"""gives the values of the attributes defined by _diff_fields (like
_diff_cmp_id for _diff_cmp_fields)
"""
return tuple(str(getattr(self, a)) for a in self._diff_fields)
return tuple(getattr(self, a) for a in self._diff_fields)

@property
def diff_identity_string(self):
Expand All @@ -94,10 +88,10 @@ def diff_subidentity_string(self):

@property
def identity_string(self):
"""like diff_identity_string, but for _comparison_fields (used in
satisfied_by comparisons)
"""like diff_identity_string, but for both _diff_cmp_fields and
_diff_fields (used in satisfied_by comparisons)
"""
return " ".join(str(el) for el in self._cmp_id if el is not None)
return " ".join(str(el) for el in self._diff_id if el is not None)

# TODO: make it "lazy" or may be there is already a helper in attrs?
@property
Expand Down Expand Up @@ -137,15 +131,15 @@ def _satisfied_by(self, other):
spec object.

We require that the values of the attributes given by
_comparison_fields are the same. A specobject with a value of None
for one of these attributes is less specific than one with
a specific value; the former cannot satisfy the latter,
but the latter can satisfy the former.
_diff_cmp_fields and _diff_fields are the same. A specobject
with a value of None for one of these attributes is less specific
than one with a specific value; the former cannot satisfy the
latter, but the latter can satisfy the former.

TODO: Ensure we don't encounter the case where self is completely
unspecified (all values are None), in which case satisfied_by()
returns True by default. Perhaps this is done by making
sure that at least one of the _comparison_fields cannot be None.
sure that at least one of the _diff_cmp_fields cannot be None.

TODO: derive _collection_type directly from _collection. This isn't
possible at the moment because DebianDistribution.packages is
Expand All @@ -162,9 +156,7 @@ def _satisfied_by(self, other):
raise TypeError('don''t know how to determine if a %s is satisfied by a %s' % (self.__class__, other_collection_type))
if not isinstance(other, self.__class__):
raise TypeError('incompatible specobject types')
for attr_name in self._comparison_fields:
self_value = getattr(self, attr_name)
other_value = getattr(other, attr_name)
for (self_value, other_value) in zip(self._diff_id, other._diff_id):
if self_value is None:
continue
if self_value != other_value:
Expand All @@ -176,14 +168,125 @@ def _identical_to(self, other):
"""Determine if the other object is identical to the spec object.

We require that the objects are of the same type and that the
values of the attributes given by _comparison_fields are the same.
values of the attributes given by _diff_cmp_fields and _diff_fields
(dereferenced in _diff_id) are the same.
"""
if not isinstance(other, self.__class__):
return False
for attr_name in self._comparison_fields:
if getattr(self, attr_name) != getattr(other, attr_name):
return False
return True
return all(sv==ov for sv, ov in zip(self._diff_id, other._diff_id))

def _find_attr(self, child):
"""Find an attribute among TypedList attrs which might contain the child

Checks are done based on the type. Returns None if no appropriate
attribute is found
"""
child_class = child.__class__
assert issubclass(child_class, SpecObject) # only those are supported ATM
compatible_attrs = []

# For paranoids - do exhaustive search
for attr in self.__attrs_attrs__:
metadata_type = attr.metadata.get('type', None)
if metadata_type \
and issubclass(child_class, metadata_type) \
and issubclass(attr.default.factory, list): # TODO: fragile
# we got our hit for a possible container containing our
# classes
# TODO: cache mapping from child_class to known types
compatible_attrs.append(attr)

if not compatible_attrs:
return

# If multiple provide list of the same class -- blow for now?
if len(compatible_attrs) > 1:
raise ValueError(
"Multiple attributes seems to contain instances of %s: %s "
"ATM we are not supporting that."
% (child_class, compatible_attrs)
)
return compatible_attrs[0]

def _find_in_attr(self, child, attr):
"""Given an attribute (should be containing an iterable) find a child

No checks for attribute appropriateness (type check etc) for the child
is carried out
"""

if not attr:
return # Found no attribute which might have contained it

child_id = child._diff_cmp_id
values = getattr(self, attr.name) # TODO: isn't there a better way?
hits = [
v for v in values
if v._diff_cmp_id == child_id
]

# checks
# TODO: parametrize etc to generate/allow for multiple hits?
if not hits:
return None
elif len(hits) > 1:
raise RuntimeError(
"Found multiple hits for %s in %s: %s . ATM we expect only a "
"single one",
child, attr.name, hits
)
return hits[0]

def find(self, child):
"""Find an object among TypedList attrs which matches (identity vice)
"""
attr = self._find_attr(child)
if not attr:
return None
return self._find_in_attr(child, attr)

def __iadd__(self, other):
"""Add information from another object into this one"""
# Check the other one is of the same kind
assert isinstance(other, self.__class__)
# TEMP -- to decide what to do when subclass. For now take only
# exactly the same
assert other.__class__ is self.__class__
# TODO: consider being able to add objects this class knows about.
# e.g. Adding DebianPackage's to .packages, or DebianDistribution
# to .distributions. That might eliminate if/then/else logic in
# retrace
# go through attrs, and if the other one defines a single attr which
# is not defined here -- blow
# For the lists - add those which aren't found, and found should get
# += 'ed if spec objects, appended otherwise
for a in self.__attrs_attrs__:
a_self = getattr(self, a.name)
a_other = getattr(other, a.name)
if a_self == a_other:
# All good and nothing for us to do here
continue
if isinstance(a.default, attr.Factory) and a.default.factory is list:
# we have a list of things...
if issubclass(a.metadata['type'], SpecObject):
# we might know what to do
for a_other_value in a_other:
a_self_value = self._find_in_attr(a_other_value, a)
if a_self_value is None:
# a new one!
setattr(self, a.name, [a_other_value])
else:
# Delegate doing the right thing to the child's __iadd__
a_self_value += a_other_value
else:
raise NotImplementedError(
"For now joining only lists of our own spec objects"
)
else:
raise NotImplementedError("I think")
# import pdb; pdb.set_trace()
pass



def _register_with_representer(cls):
Expand All @@ -210,6 +313,13 @@ class Distribution(SpecObject, metaclass=abc.ABCMeta):
# name and looks awkward
name = attrib(default=attr.NOTHING)

# Distributions are typically a singular entity on a system, although
# in some cases there could be multiple (e.g. conda installations) which
# managed to get used.
# So their identification by default will be done just based on the name
_diff_cmp_fields = ('name',)


@staticmethod
def factory(distribution_type, provenance=None):
"""
Expand Down Expand Up @@ -423,3 +533,101 @@ def _create_package(self, **package_fields):
provided by _get_packagefields_for_files
"""
return


class SpecDiff:

"""Difference object for SpecObjects.

Instantiate with SpecDiff(a, b). a and b must be of the same type
or TypeError is raised. Either (but not both) may be None.

Attributes:

a, b: The two objects being compared.

If _diff_cmp_fields is defined for the SpecObjects:

diff_cmp_id: The _diff_cmp_id of the two objects. If
_diff_cmp_id are different for the two objects,
they cannot be compared and ValueError is raised.

diff_vals_a, diff_vals_b: _diff_vals for a and b, respectively,
or None if a or b is None.

For collection SpecObjects (e.g. DebianDistribution, containing
DEBPackages; these have _collection_attribute defined), we also
have:

collection: a list of SpecDiff objects for the contained
SpecObjects.

a_only: SpecDiff objects from collection that only appear in the
first passed SpecObject

b_only: SpecDiff objects from collection that only appear in the
second passed SpecObject

a_and_b: SpecDiff objects in collection that appear in both
passed SpecObjects

If a and b are lists, they are treated as files specifications, and
self.collection is a list of (fname_a, fname_b) tuples.
TODO: give files and file collections their own specobjects
"""

def __init__(self, a, b):
if not isinstance(a, (SpecObject, list, type(None))) \
or not isinstance(b, (SpecObject, list, type(None))):
raise TypeError('objects must be SpecObjects or None')
if not a and not b:
raise TypeError('objects cannot both be None')
if a and b and type(a) != type(b):
raise TypeError('objects must be of the same type')
self.cls = type(a) if a is not None else type(b)
self.a = a
self.b = b
if self.cls == list:
a_collection = set(a)
b_collection = set(b)
self.collection = []
for fname in set(a_collection).union(b_collection):
if fname not in a_collection:
self.collection.append((None, fname))
elif fname not in b_collection:
self.collection.append((fname, None))
else:
self.collection.append((fname, fname))
else:
if self.cls._diff_cmp_fields:
if a and b and a._diff_cmp_id != b._diff_cmp_id:
raise ValueError('objects\' _diff_cmp_id differ')
self.diff_vals_a = a._diff_vals if a else None
self.diff_vals_b = b._diff_vals if b else None
self.diff_cmp_id = a._diff_cmp_id if a else b._diff_cmp_id
if hasattr(self.cls, '_collection_attribute'):
self.collection = []
a_collection = { c._diff_cmp_id: c for c in a.collection } if a else {}
b_collection = { c._diff_cmp_id: c for c in b.collection } if b else {}
all_cmp_ids = set(a_collection).union(b_collection)
for cmp_id in all_cmp_ids:
ac = a_collection[cmp_id] if cmp_id in a_collection else None
bc = b_collection[cmp_id] if cmp_id in b_collection else None
self.collection.append(SpecDiff(ac, bc))
if hasattr(self, 'collection'):
self.a_only = []
self.b_only = []
self.a_and_b = []
for pkg_diff in self.collection:
if isinstance(pkg_diff, tuple):
(a, b) = pkg_diff
else:
a = pkg_diff.a
b = pkg_diff.b
if not a:
self.b_only.append(pkg_diff)
elif not b:
self.a_only.append(pkg_diff)
else:
self.a_and_b.append(pkg_diff)
return
1 change: 1 addition & 0 deletions reproman/distributions/conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ class CondaDistribution(Distribution):
environments = TypedList(CondaEnvironment)

_cmp_field = ('path',)
_collection_attribute = 'packages'

def initiate(self, environment):
"""
Expand Down
5 changes: 4 additions & 1 deletion reproman/distributions/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ class APTSource(SpecObject):
site = attrib()
archive_uri = attrib()
date = attrib()

# name contains a suffix so we probably should not use it
_diff_cmp_fields = ('origin', 'codename', 'component', 'architecture')

_register_with_representer(APTSource)


Expand All @@ -89,7 +93,6 @@ class DEBPackage(Package):

_diff_cmp_fields = ('name', 'architecture')
_diff_fields = ('version',)
_comparison_fields = ('name', 'architecture', 'version')

_register_with_representer(DEBPackage)

Expand Down
3 changes: 2 additions & 1 deletion reproman/distributions/redhat.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class RPMPackage(Package):
vendor = attrib()
url = attrib()
files = attrib(default=attr.Factory(list), hash=False)
_comparison_fields = ('name', 'version', 'architecture')
_diff_cmp_fields = ('name', 'architecture')
_diff_fields = ('version',)


_register_with_representer(RPMPackage)
Expand Down
4 changes: 4 additions & 0 deletions reproman/distributions/vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class GitDistribution(VCSDistribution):
_cmd = "git"
packages = TypedList(GitRepo)

_collection_attribute = 'packages'

def initiate(self, session=None):
pass

Expand Down Expand Up @@ -294,6 +296,8 @@ class SVNDistribution(VCSDistribution):
_cmd = "svn"
packages = TypedList(SVNRepo)

_collection_attribute = 'packages'

def install_packages(self, session, use_version=True):
raise NotImplementedError
SVNRepo._distribution = SVNDistribution
Expand Down
Loading