diff --git a/scrapinghub/client/items.py b/scrapinghub/client/items.py index 669ea1af..d22e2701 100644 --- a/scrapinghub/client/items.py +++ b/scrapinghub/client/items.py @@ -1,5 +1,7 @@ from __future__ import absolute_import +import sys + from .proxy import _ItemsResourceProxy, _DownloadableProxyMixin @@ -37,6 +39,34 @@ class Items(_DownloadableProxyMixin, _ItemsResourceProxy): 'size': 100000, }] + - retrieve items via a generator of lists. This is most useful in cases + where the job has a huge amount of items and it needs to be broken down + into chunks when consumed. This example shows a job with 3 items:: + + >>> gen = job.items.list_iter(chunksize=2) + >>> next(gen) + [{'name': 'Item #1'}, {'name': 'Item #2'}] + >>> next(gen) + [{'name': 'Item #3'}] + >>> next(gen) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + - retrieving via meth::`list_iter` also supports the `start` and `count`. + params. This is useful when you want to only retrieve a subset of items in + a job. The example below belongs to a job with 10 items:: + + >>> gen = job.items.list_iter(chunksize=2, start=5, count=3) + >>> next(gen) + [{'name': 'Item #5'}, {'name': 'Item #6'}] + >>> next(gen) + [{'name': 'Item #7'}] + >>> next(gen) + Traceback (most recent call last): + File "", line 1, in + StopIteration + - retrieve 1 item with multiple filters:: >>> filters = [("size", ">", [30000]), ("size", "<", [40000])] @@ -59,3 +89,43 @@ def _modify_iter_params(self, params): if offset: params['start'] = '{}/{}'.format(self.key, offset) return params + + def list_iter(self, chunksize=1000, *args, **kwargs): + """An alternative interface for reading items by returning them + as a generator which yields lists of items sized as `chunksize`. + + This is a convenient method for cases when processing a large amount of + items from a job isn't ideal in one go due to the large memory needed. + Instead, this allows you to process it chunk by chunk. + + You can improve I/O overheads by increasing the chunk value but that + would also increase the memory consumption. + + :param chunksize: size of list to be returned per iteration + :param start: offset to specify the start of the item iteration + :param count: overall number of items to be returned, which is broken + down by `chunksize`. + + :return: an iterator over items, yielding lists of items. + :rtype: :class:`collections.Iterable` + """ + + start = kwargs.pop("start", 0) + count = kwargs.pop("count", sys.maxsize) + processed = 0 + + while True: + next_key = self.key + "/" + str(start) + if processed + chunksize > count: + chunksize = count - processed + items = [ + item for item in self.iter( + count=chunksize, start=next_key, *args, **kwargs) + ] + yield items + processed += len(items) + start += len(items) + if processed >= count: + break + if len(items) < chunksize: + break diff --git a/tests/client/cassetes/test_items/test_items_list_iter-json.gz b/tests/client/cassetes/test_items/test_items_list_iter-json.gz new file mode 100644 index 00000000..daed6b94 --- /dev/null +++ b/tests/client/cassetes/test_items/test_items_list_iter-json.gz @@ -0,0 +1 @@ +eJy11ol/HFUdAPAkLQWaBopXKIhsA4lLktljNscmIdK0aUKOZnM1maKP+jL7sm+yuzP7myMXRlvkkGCBVqriSVFUDhFqUatCdcYD7/vW/8XfvNkkbbIlNf24yezsvHkz781739/vzbGKJSgLK9eWlZXNMtPSDB3KB8qVXXis6TYzqWpjmQUVBLaFl2B7UNVk4DDLhmuWYEdY2YEleWZzIw3XKtvxYDg1Ng7XKdvwp2NqcL1yJ/7itl1oj0YTiUjw3xxvT8aSsSgtaFHT0SMzFra9U1w/ZaQXoFIZxJ8F05hhqt3ZFJfj8WSdVdDSzOzklmRj+1JwWIeN087a1v21smzZ1Ga4r010NeAOb6xresYvaO2GXaLznFG8yIKqJbghrOzEksMWM6WuDNNtuJHAbuVmv+EFfCBdslQTO6hnuDMVlSNyJA43UeVGPN+lqqxgSwd11UjjeXgHgXcqVXgis6gVGkNpNp3DnsC7qBieoDa8m8B7xLDUR+uhmorGDxi6zsQow80E9oiyLGMFiea0WQa3BDc4YBhZjcGtBN6rDOCxapnTtpFlutx5UJ+C1NxQ7yFdXxzn2daeI0OZWH5hZHB0FHgh36oaKT01OTCcHW/STHtxcbJrsjsLmS5jsj/LbZaC26hyQ9ARG4dAGmR6xubwPgK3KxVY3toCISpErNQYXygw2EugRtmLpbRQyGkq9Z8gOi/Nzc1J04aZlxwzx/zBYWm4g4qR6XJwSE1tUVSFOxUZy/ZTS1ND9+Wv9O9IC9Q6jnKdQGgVkCaDuiV4f6DQn33HAoR6V1hI8tuH+oG3xMTnmWXRDIMG8VipAWh0OCKQgqrd/nRFCESV2/BokqUbQ3IilFLtkByLt4Xi8fbmZHtzc6j30DjEgqlTpMMFyzYZzUOcgKzchGVpanEpTy2MnaMoZzYOiRKD10SgWdm9bvBEBLRQ0ZtZai5AK4HkxbPfFlgYYybGKrQT6BB31jOaPh+NR+I+z7upuO9Ka6s+Owl8QNzZ9wn3bLC3j0DXenv7qXKN38dczpiDAwS6xTT60d0YSg2P96WGxuAgdXjlEvQUx9/02+pVj/oDr1o7AwE74V5lCE/f7j7uTxx+trkvTtRH+hrvkibcs6PesvfkhHdqBPfLKW95sNwc3OWe2FUleee8E5L3epV7eqYcr/IueBe2lTVU70jU4hH0iTYxzDQ9Dv2PwMAoDFqOswSHwhwz01CYX5SOUvx65e61NGTZhokWIrP5yEUhHlGNfFSzWd6KysEn2toWjd+Drky7MwbDvJLfq/SvPIp7bJ/7KqnwLrgvTWC3R/BRYt5TI8P9fY1922XvuHvOPT4qTTZ3e8u7D+G2l7kP1bHt7tMVZbvdE+3eoyP+Y4xw7DvG6piPcXxDRjpMYOLtM9Ik5Zh9lBLZ5wjlmHLu49WUY275IL8lwKEWcbAVHB8iQNZw3E85Bv9RESXNrfBhyrccq9RxOIbnVJhjTKphjqHIV8MvLcKPhUtln2kCGcWf8xhwqtxaTKGWJfn1TCMndfkopZSpIX7QCMyI2vWQpcoeP0ZQoorBZlIds4RpS2NMxbXIXoAcgbwi+emAzktIoDMRb060xGKxjpCmqzknzcacqW4jTzXd6ggVTJYzaLoD9BJRbBAoiNZsNm9HCzm8pCOkcmpazO48PN4jJQGC8DB5r+K7B0sAtQVQ5xKgswi0+YqAzhhTsOrTKaT9mZ7jlUoDXv1AjVgBa9pDNdOarlmcpWsaQzVZtuAXBauoMF2zBPP+8C+EOQpb5AGiB3hA5iNFMktIxsfwUTFnTQn42FVgOBZgOC4wPLgOw8cFhodKYniYwCNBB1rgUbqWrT9B4LG3zdZJka2XN2Trxwl8Uqzi/lCuZOs5NgUnSszyEwSeVKo3LHR+ts5pOrPgqc3InSRwagvkPnVJun+awOmN6f7TVKw6iuTX0lQmTRTf4j5D4LNKGE/JbCrWpFI1ISfkNpm2JqdlOZlMtLFYbHpajlMWa07KckyFZzaPs88R+DzPBqS/gKQjQlwJXEgOH2N21WLxPQzRfVEEwJdEAHzZDwD/daj34Dg8i/7p1SXoYlGt3NPahl9x3GJ1quHoeCp4STyKfYUzvHLIx/acoP+VIv2vFuk/X6T/NZ/+lq1/PbD+DWH9hXXWXxTWXypp/WUC3wxeu5rglRIav0XgVaXu8hrXJ6DXNsN5lsC3t4Dz3GXeM14n8J21peS7m5v6HoHzK6a+z3txfc1e8qpQtrq4yri2+kvruHu2YmxHf3u/95r3jPuCe3qf+4r7mHvGPYnbGfdh9033vPs8fp/BTSzAint+r7tsHvBe9par3OPecrn7rPdGj/vg/X5O/oG/AP9wFN4QNN8UNC9cTPNH/xeacgmaPy7SdAVNr0jzJ0WaPy3S/NlV0fx5QPMtQfMX62j+UtD8VUmavybwG1GvRYbflqD5OwK//19o/mEzmn8k8Kct0PzzZWj+hcBf12j+bXOafyfwjxWa/xQ0Jy9L89QazZ4rpHlyVKoua8i5z+2J+gz/5TP89yj8BxkyJ/JfcC0dww== \ No newline at end of file diff --git a/tests/client/cassetes/test_items/test_items_list_iter.gz b/tests/client/cassetes/test_items/test_items_list_iter.gz new file mode 100644 index 00000000..df48fd73 --- /dev/null +++ b/tests/client/cassetes/test_items/test_items_list_iter.gz @@ -0,0 +1 @@ +eJy1lolfFOcZxxFNYkQTTJsak6ZZidAVmT1mgeUIjSiCCrJcwmjzxr7Mvuw77O7MPnOAkNLGlBpDSmOttU2alKRnjrYxqWmbtqntTI/0vu/jb+kz7yyisAY/+OnC7sx7zLzv+zzf3+99Hy2fgbKwcktZWdkEMy3N0GFd9zplM5Y13WYmVW2ss6CcwPrwDGwIupoMHGbZcNMM3BxWbsaaPLO5kYZblA1Y6EsNDsFGZT3eOqYGtyo78Y7bdqElGk0kIsF/Q7ylKdYUi9KCFjUdPTJu4dibxPOjRnoKKpQevC2YxjhT7bb6uByPN9VYBS3NzDZuSTaOLwXFGhyctlUn91bLsmVTm+G1OtG+Gy/4Yl3TM35FsgM2i8lzRvEhC7bMwG1hZRPWHLGYKbVnmG7D7QQqlbv8gadwQbpkqSZOUM9wZzQqR+RIHLZS5XZsb1dVVrCl/bpqpLEd7iDwLmULNmSmtUJdKM3GcjgTeDcV4Ql6w50E3iPCUhuthW1UDL7P0HUmogx3Edgu6rKMFSSa0yYY3B28YJ9hZDUG9xB4r9KNZdUyx2wjy3S5bb8+CqnJ3q7Duj49xLPJzqO9mVh+qr9nYAB4IZ9UjZSeGunuyw7Va6Y9PT3SPtKRhUy7MXIoy22WgnupclswERtDIPUwPWNzeB+B+5RyrE82QogKIhZ7DE0VGOwgUKXswFpaKOQ0lforiJ6QJicnpTHDzEuOmWN+cFga7qciMu0OhtTUpkVX2KnIWLeXWpoaOpa/3r+jjVDtOMpGAaFVQDQZ1MzA+wMK/ew7FiCou8KCJH98qO1+WyQ+zyyLZhjsFstKdUOdwxECKeja4acrQiCq3IulEZauC8mJUEq1Q3Is3hyKx1samlviyVDX4SGIBalTpCMFyzYZzUOcgKxsxbo0tbiUpxZq5ziSMxGHRIng1RNoUCqXBU8ooJGK2UxQcwqSBJquzH5zwMIgM1Gr0EKgVbxZz2j6iWg8EvfxfICK9y6OdpnPNgIfEG/2+YQHV7C3h0D7cvb2UuUmf465nDEJ+wh0iDT66q4LpfqGDqZ6B2E/dXjFDHQW42/6Y3Wpx/3Aq9amgIBNcEDpw+b73Cf9xOFnvfvycG3kYN0uadh9fcCb854a9s7243Uu5c31rDN7Nrvzm7dI3kVvXvLecOfdc+Pr8DHvkndpfZnpvrQnXo1FOChGRaFpehwOnYLuAeixHGcGDoc5elNvmF9hSCl+q/LAkhFZtmEiDZGJfOQKkUdUIx/VbJa3onLwiSabo/KDSJZpt8Wgj1fwA8qhxcW45/e4F0i5d8l9ZRgn3o+LiXln+vsOHaw7uEH2TroX3ZMD0khDhzdXeRi/O5g7W8M2uOfKyyrd+Rbv8X5/Gf0c545qHfRxHFrhSUcIDL+zJ41Qjv6jlPCfo5Sj6Rzj2yhHd/kgvzvAQy3iwRbxeIgAWcLjYcpR/seFThqS8CHK16xW6jgcBToa5qhKNcxRjPyyANNCgCxcyn/GCGQUP+kx4FS5p2iiliX5/UwjJ7X7WEopU0P8QSMwLnrXQpYq232VIIsqys2kOvqEaUuDTMXdyJ6CHIG8IvmGQE9IiEBbIt6QaIzFYq0hTVdzTpoNOqMdRp5qutUaKpgsZ9B0K+gldGwQKIjRbHbCjhZy+EhrSOXUtJjddmSoU2oCCARi8i7FJx8sAagtAHWuAnQCAW24LkDHjVG4zKdTSPuZnuQVym58+pEqsQdWtYSqxjRdszhLV9WFqrJsyq8K9lHBdNUMnPDDPxXmSNg0DyB6hAfIfLiIzAwi48PwEZGz+gR89AZgeDSA4aSA4bFlMHxMwDBbEoaPEzgVTKARHqdLfn2awBPv5NeJeuHXcyv8+kkCnxD7uB/KRb+eZKMwXyLLnyTwlLJtxVbn+3VO05kFZ1ZD7lMEzq4BuU9fZfjnCHxmpeGfp2LfUSS/l6Yyabh4jvssgc8pYWyS2WisXqVqQk7IzTJNNo3JclNTopnFYmNjcpyyWEOTLMdUeHp1nT1D4PM8GyD9LCIdEcSVgAuRw2VMXGaxeBJD6J4TAviCEMCCLwD/QNS1fwieR/7pjRl0sapa7kw2449/E6tRDUfHpuCYeBznCi/wil4fti8K9L9URP/LRfS/oty5Itd5K1Ogaha+KkTxNV8Ua1bBi4EKXhIqeHmZCl4RKvh6SRV8g8A3Rb/GBLxagtMLBF4TZ+2Sc19uTK+vBu23CFxcA7RvXOME8m0C31naYr67OmtvEvjeImvf51247z501SGiLPuwe8592nvMvbCz2n1tJ+xyT8eUO9wF97lj7pveeW/WO+Odxu+Zvd6C9+yAt3DAW3Cf3+6+6M5udV/wZisH3Sd2lHnnjnqnvFc7fXP+gb8TvzUAPxSMXhKM/uhKRn/8f2FULsGoW2TUE4z+pMjoT4uM/owHJP78hkh8OyDxF4LEXy4j8VeCxF+XJPE3BH4bnA4S8LsSJP6ewB+un8Q/rkbinwj8eQ0k/uUaJP6VwN+WSPz76iT+g8A/F0n8lyCx5xokXlwi8dgqJG4s857pcd/yzt7vk/dvn7z/DMB/kTzmRP4HmBEbEw== \ No newline at end of file diff --git a/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count-json.gz b/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count-json.gz new file mode 100644 index 00000000..13ed2f72 --- /dev/null +++ b/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count-json.gz @@ -0,0 +1 @@ +eJy11glYFNcdAHBEYyJeSTUxZ7PSQDfK7DELyy5KFUUQQbkWmJC+mtnZBzOwO7v/ORYwoUlMYxVDU89W01hNY6ImTSPNUduq7Zse6X3f933f9/V9ffNmEIQlGPyysDszb4735r3f///effmDkOcVrszLy8tiTVfSKsyqnyUsoMeKamBNlAxapkM+gtneQZjjXKphMLFuwBWDMNcrzKUlKWzI6QRcKcyhB02NrTG4SphNd01NgXnCbXRPNoxMhd8fCvmc/7JgRSQQCfjFjOLXTNXXo9O6C9j98XRiAOYLDXQ3o6V7sGRUlgb5YDBSrGeUBNYqZZ0zaP2cc1hMKxcri8rXFfG8bogGptuiUNVKuqEPVhW12y4or4YFrPEyFulNOiwchEVeoYCWtOlY46q6sWrAYgRXC9fbFQ/QF1I5XdJoA9Vu2Yz7eR/vC8I1orCYnq+SJJwxuA2qlE7Q8/AaBEuEhfRE9zYlU+JJ4K4kbQksFVn3OFfDtQiuY92ywr8Cloms8vVpVcWsl+F6BDewsl6MM5yYVLIYbnQesD6d7lUw3ITgZqGeHku61mWke7HKV3Y2hZOdne1t9ansQE9fTa+8LYNbVInvLQvX1UGiYX0s0tazaZsc7oh3CPVY71hXG62KbcwE6uLJ/ng2C7eIwiKnIQbtAq4Bq92GDK9FcKuQT8vLw+ARmYjRK2IDGQzLERQKy2mpmMkkFUm038Dfz/X19XFdaS3FmVoS252DE/A6kfVMlUm7VFO2sUvhNoGnZetEXZE8nalL/bsjDEWmKVzFEOoZShND8SC83lFoj76pA4V6u5dJsuuHFfUvsYFPYV0XuzGsZK/VWA8lpkwRcF7hCvs1ksl0H/gQ+FljbcMlnsamWF3jllYIOGMlcG0Z3dCwmIIgAl64hpYlRF3mUqJOg2UrpZINQkhkdVfb41+KoEy4hR514ESJhw95GiXDwweCUU+QrwgEKkJlntrNMQhPslCOIDLRQlQUrh43DBfsVSBYxaq07cHqHINVieAN7N7xg8Uibo3T2KyoDcBaBFXjta1z7LVijeYGWI+gmj1Z7VbUfn/QF7TDYYNoyvMHocbtf81uT6201e54SS9wBBTARqGZnr6V7LYHjn5mk1PtK3x1Jbdz7WSkxRqyHm639jbT7VCjNdQwS2tYQIYXLOSs56xhznqeDJMDPbPobdZ56/zsvA3WA2R7oIgeQx2rlkaaogZh0w6ob4EG3TQHYbNXpslpi1cel5Ea5XnC6rFMpBtpjXLwZVO+cVHuk9Ipv2LglO7nnY+/POoPraG0NKMyAE3yfHmjsH30bazja8mzKN86j6xd0YK85UvnWHvu2bJ8TTiz1XoiTLaTXeQMOWQ9ZD1aWypbJ8iJAusI2WvtsY631ay0TqbIC+QsebKGnOtGZKT6OutxsvNma+eSgSZPATm6cWkV2eUl9/PNc68lj+WTU9WmtW8xORwku4sX0f6AZpm+c0sLtNqOY5OSWRuC9pdPZh2iTBOXkCNx3SHKNFt1ystEmaalO+UbHXuSqwqP2nsjAjRm702iTPPGVhZg0VK4S5RnHOaiaco0suNemYaz5JVpFMsXIjfBIhd7J0vvQtAt3EBLDdxv+DNJUVFXeSRZ1HRsVLbFargIyDnznYKgR7CRBaBXZE9opZol+lhNVGmm0QyuFUt0PjMGIIkgJXB2ShH7OWqoMhQsC4UDgcAqj6JKSTOBW814dTpFK9dXeTIaTqbFxCpQReEmdzLQdc6uX0snuSo78XCNmkLDCtIIMqwVKwCcyNLkWsEOGdAZbIPBNi+CnaWwyy4Jdk86Dhdcm5mEPdJ98nxhJb377kI2eRZWeAq7FFXRZZwoLPEU9uIBu8iZgFksFA5Cv939A16ZCtsmO4julh0y97hkBikZG8Ob2ZiVhuDey8Bwn4PhfoZh+wQMDzAMb/HmGtUHEexwGhCGt05K4jsR7GKzud0vo0m8D8dhSGS5XeDs3KdImGt3F0e7ETwkeOkpHscDpZIohfgQH+XF8kgXz0cioSgOBLq6+KCIA2URng9IMDxuNngbgodfbjYoDbDZ4O05MvgeBHuFZZOmWzuHJxUV67DvomS9H8GBycn64HSw34HgnTOAfWh62IcRPCK7pN9FSfuYuBy4KDn63OwFi+4SjqJ7lAXAERYA77YDwF5J1W6IwVHqX7y8xO4WFfE15VH6E7K/xVLaVI3KkLO+3ErbCsfk+VtsbI8x+u9x6T/u0j/u0n/Cpj9j60861k8w6ycnWD/FrD+V0/rTCN7LrouUwjM5CL0PwbNC8dSEJubJ01MsN0YQvH8s5T83nannEbwwA1MvTm/qAwjOjJr6oFxL5+X+i1YZeeSpdrqcaCYjvHWweVNdSV2MjOS3Wjs2VWyyTluHyElyYC15hs7Rx8ge+j1GHiTn6Hx9nP6etr8tXEeZQM7MI0PaEutpa2izdZT+NJP95JF4h1oQW5TXb53NErpAsRP0h+zZ+MMtcJY5Pcecnh/v9COvitNwDqcfdZ0S5tRynX7Mdfpx1+knLsvpJx2nLzGnn5rg9NPM6WdyOv0sgs+NOv18DqdfQPDFV+L0S1M4/TKCr4w5/ep0Tr+G4OszcPqN6Z1+E8G3Rp1+mzntm9LpvjGnXZfI9JDLNEqZ6jZTa9g6Zg1xTWT/nXd1qLMo07R1pJW86DD9js30uy3wPcb0+4zpD8Yz/eGrwjTqMg2OY/ojl+mPGdOfuEx/6jL9mcv055fF9BcO018ypr+awPTXjOlvcjL9LYLfsevCPPw+B9M/IPjjK2H6pymY/hnBX8aY/nU6pn9D8PcZMP3H9Ez/ieBfo0z/zZh2TsGUjFj7x5ySg5cI9XgLtyzP2kEON6z22xL/Y0v8bwv8j0rEpu//vClL+Q== \ No newline at end of file diff --git a/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count.gz b/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count.gz new file mode 100644 index 00000000..2ba3d904 --- /dev/null +++ b/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count.gz @@ -0,0 +1 @@ +eJy11glYFNcdAHC8A3gkRmOM2qxE6IrMHrOwsBhqQOQICMrlhORJH7MPZmB3dv9zcJjQGmujaGzUGJvLmKSJMTZtjU1sbBNt3/RI7/u+7/u+z69v3iyCsAQ//LKwOzNvjvfmvd///97OmUOQ4ZXmZWRk9BHdUBMazKibIc1nx6pmEh3LJiszYCaCWd4hmO1eqhOwiGHCnCGY65XmspI4MZVEFOZJs9nBlsbmFrhKmsV2LV2FTGkN21NMM1nq94dCPve/KFhaEigJ+HFS9euW5usxWN1Z/P7ORHQQsqV6tpvUEz1ENssKg2IwWJJnJNUo0csUQzBZ/YJ7mMcqx2W5xRW5omiY2CRsmxsqX8c27MGaqnU7BcWVMJ83XiGY3WTAgiFY6JWyWEmrQXShvJtoJixCcLV0vVPxIHshTTBknTVQ61asTr/oE31BuAZLi9j5clkmSVPYpMmJKDsPixFcKy1gJ7p3qMkCT5R0xVhLYAnm3eNeDUsRXMe7Jd+fD8swr3xjQtMI72W4HsFyXtZLSFLAMbWPwA3uAzYmEr0qgRUIVkp17Fg29C4z0Us0sax9SzjW3t7WWhfvG+zpr+pVdiRJkyaLvUXh2lqI1m9sKWntuXWHEt7WuU2qI8a2iupIeUtNMlDbGRvo7OuDVVha6DbEZF0g1BOt21TgDQhulGay8uIweDAXMXJFy2CSwGoEOdJqVoqTyZgqY+cN/ANCf3+/0JXQ44Klx4jTOSQKN2HeM+UW61Jd3cEvhTWSyMoqsKHKnvb45f7dFoZcy5Ku4giNJKNJIG8I3ugqdEbfMoBBXevlkpz6Ib/uVT7wcWIYuJvAOv5ajXVQYCkMgeCV5jivEYsl+sGHwM8b6xgu8DRuaaltbGiGgDtWktCaNEyd4DgEEYjSNawsig1FiGODBUsHo9IXhBDmdVc641+IoEhaxY62kWiBRwx5GmXTIwaCEU9QLA0ESwMBT/XmFghPsFCMoGS8hQiWrh4zDBftlSJYz6t07MHNaQarDMGb+L1jB4tH3Aa3sX1YH4RbEJSP1Vbh2msmOssNsBFBJX+y1q1qA/6gL+iEwyZsKdlDUJXqf91pT7Xc4XS8bGS5ArKgRtrKTt9I9zsDxz6z6Km2fF9twVqhjZ5psvfZ97XZh7ey7b5Ge1/9DL1+Pj0wf4Fgv2AfEOwXN9MjPTPYXfYF+8KsDHun/YB9oCiXFUAtr5ZFmqoF4dZ7oK4J6g3LGoLNXoUlpwavMiYjNSqZ0s2jmcgwEzrj4OuL+8ZEuU9OxP2qSeKGX3Q//uKIv3ADo6WbZQHYomQrNdKukbexz99CT6OZ9gVkD0eyMlYvmW0fuqth9YZwssM+Eaa76DA9Rx+y77WPVRcq9kl6Mst+jB62D9lPt1ats5+N07P0FfpMFT3fjeiZyuvsp+jelfbeawe3eLLo4zVLyumwl94tbp27lD45k56qtOz7F9GHg3R/3kLWIbBVYe/c1ATNjuOWCcmsFUHbayezbVhhiUtKk7huwwrLVu3KMqywtHS7coNrT06pIiP27kCARu1txwrLGx08wCKF8GasTDvMsWUpLLI7vQoLZ9mrsChWLkZulEcu8U6U3oWgW1rOSk0yYPqTMaxq6z2ygnWDmGWtLVVCCShp852KoEdylAWgF/MnNDPNMnusjjWWaXRTaCYym8/MQYghiEuCk1LwgMAMlYWCRaFwIBBY71E1OWZFSbPVWZmIs8qN9Z6kTmIJHF0PGpZWpCYDwxCc+vVETCh3Eo/QqKssrCCBIMlbkQ/gRpauVEtOyIDBYZsctnUJ7D4Gu+iyYPckOuGiaysZdUa6X8mW1rG778zhk2dOqSenS9VUQyHRnAJPTi8ZdIrcCZjHQs4QDDjdP+hVmLAdiovoTsUlc1eKzBAj42B4Cx+zwhC89Qow7HQx3M0x7BqH4W0cw25vulF9O4J73AaEYc+EJL4XwTCfzZ1+GUni/aQT9mGe2yXByX2qTIS21OJoP4J7JS87JZLOQKGM5ZAYEiMiLi7pEsWSklCEBAJdXWIQk0BRiSgGZDgwZjZ4B4L7XnM2KOSzwcE0GfwQgsPSsgnTrZPDY6pGDLj/kmR9BMEDE5P10algvxPBg9OA/dDUsB9G8IiSIv0oI+3j4tLgYuTYc/suWkwt4Ri6YzwAHuMBcNwJAGclVb2pBR5n/vGVJfZUUa5YVRxhP4XsG8qTE5ZmloXc9WUHays8oWQ3ONie5PTflaL/VIr+09LSCQMUN7qTWO6FEzwonnGCYtpRcNKNgmd5FJwaFwXv5lHwXNooeA+C97prORHelwbXaQTP80V62raPz59nJlmGvB/BC6NTwYtTWTuL4APTsPbS1NbOIfjgiLUPKdVsvu66ZPWR0budHmGz8i768ppc+vwaWEv3BrYvpsfpsXZ6zj5q77YP2nvZ92CFfdx+tMk+Xmkfpy8tpyfp7jn0CXv37GY6nFmTua6CDq+lJxY3Z4j2Hnq0QXXS9MvOnPxKE5znWi9wrR8eq/Ujr4vWcBqtNKXV5lo/mtL6sZTWjyuuyU9ckclXXZOf5CY/Nc7kp7nJz6Q1+VkEnxsx+fk0Jr+A4IuXb/JLk5j8MoKvjJr86lQmv4bg69Mw+Y2pTX4TwbdGTH6bm4xOYvLsqMnbpzL5HDe5yjG5kpmM1GTOYybnMJPZGStWdiznIr/jiPxuE3yPi/w+F/mDsSJ/+LqIjKREBseI/FFK5I+5yJ+kRP40JfJnKZE/vyKRv3BF/pKL/NU4kb/mIn+TVuRvEfyOX1cUgt+nEfkHBH+8fJF/mkTknxH8ZVTkX6cS+TcEf5+GyH9MLfKfCP41IvLfXGTdJCLPXRRJT98xBcnFGfICumfZTQ68/zjw/tsE/2PwiOX7P3+bLlM= \ No newline at end of file diff --git a/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count_2-json.gz b/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count_2-json.gz new file mode 100644 index 00000000..b1b8e3b7 --- /dev/null +++ b/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count_2-json.gz @@ -0,0 +1 @@ +eJy1l3lAFNcdxwGNiagxVhNzNisNZIPMHrMs7CJoUBQVFDnE0ebFDrMP3sjuzP7m4DCh1dRYxdhUU9OaNqmm0YhJ02hz1LbRdqaHve/7vu/7Sv/rb94sirAWq+3C7O783sx7b36/z+/7++22oiEoCEpXFxQU9FHDVHUNCpsKpZl4rmoWNWTFQpsJRQSmBIdgqn+pQcGmpgVXDcG0oDQNLRlqMT0FV0tT8WRdS3sHXCNNwa+2ocJ06Q78xiwrWxMOx2Ih/z8erUlEEpGwnFXDhq2Ftpi4djG/v0tPDcIMqRm/Zg19C1WsusqoGI0mysysmqJGHTMFC9cX/NMyXFyuK61eWiqKpiVbFD9LY/UL8QMn1lStxzNUN8BMvnlGZbzJhFlDcG1QKkbLepMaQn0P1SyYTeA66UZv4UF8IE0wFQM3qPUwuysshsRQFObI0mwcr1cUmrWE5Zqip3AcXkNgrjQLB3q2qtmKQIp2p3EnME/m7vGvhusJ3MDdUh4uh/kyX3yZrmmUexluJHATt/VSmhXktNpH4WZ/gmW63qtSuIXArVITnium0W3pvVQT6+rr41ubVrYvtZUoi62WVyYrs43ZjgY2kDL7U8n+1o2y0VJVmWqS9MTStuZkpCrdbiaUtVoqSeuXt0kNcJssXetvxEIXCM1U67EYvJbA7VIR2qurICBzIkav6BjMUlhAoERagFY5m02riuw9QXhA6O/vF7p1IyPYRpp6zqEpeJ3MPVNvo0sNdSu/FO6QRLQtlU1VCWzKXOrfxiootW3pGg6hmUU0KZQNwZ0+hV70bRMQ1LuCnCRvfShvOssDn6GmKfdQWMgfq6UJKmyGEAg+BJKwPmtaBpUzECIQluagLSWbTMjIJibCZsSgLwoRWbpujCfOhT9KQOQreuGHmB+0dmpgUkElgTj3n9ajagPhaCjqcVSVx6fVBBJ8/rE+5YmRlPnsDR5TNQQWSbfh2QaaqgiIsUCLYgXESDQZiIo1kWhNPBFoXNMBtbJ0lTdVOq33Qx2BxTwIXm5WBFrWdaxqWdsOSyYweDeB+vEMLvUX75ONQVhGoGEskctlm80YghU5/xueMxqVzZ7jFbPYJ6AYVkrrcPh2Z48XOHxNcY53lodWVdwldDon29xh9+FOd38rfg63uMPNhUbzTGfvzFmC+4K7V3BfXOMc2FKId7ln3DNTCtyXnTOzK0vxHFbxVTHRVC0Kq3dCUxs0m7Y9BGuCDLVpbZCNEaQWNl2qPS9EpqUbSEOoLxMak+QhRc+EVYtmzLDov8LVyXB8CZJlWHURWMdmsJXS9tGHSdQ7z5Mi9wxxdyeLCxbMm+ruu3/tgiVV2c3u0SrnAWe3c8o56D7kPt5YydxjzrFi9wlnv7vPPbJ+xUJ3JOO85LziPL3COd1DnJMNN7hPObtudXfNHVwXKHYOrZxX7+wOOtvF1mnXO08WOccbbPeR2c5jUWdP2bXoDmhl+MhtbdDuUdwxQcrWE+j8z1K2QWYoW1Ie2dooM9SqTWy+zFCUXs9u9rFXcrDSUezvIUDOY3+vzFA1NvP0SlbCG2R22Uku2zbDvO4KMkxmJcjK17xayBbyTTTiBvvlwUCHmqGCbluQ4mlMgxPzqZtAjzQdrRYdsMLMyqSB+Vq0TFYYFbyLDT0NKoEtXFM0XVC8EeidkBhpAhmeUEpaR9HRfO511ijdg8Zab/LFtV7lWlzLoovjkcrA+I3WhnGguIPRgMmFIZBSU9qdVsAXslRA1QIWXhoqrg378/AdLy6GLGcaONPGBUyb/wumLWSaM2QHGULTx3wu+plPwUCOgkGkgGHAtzI/zveNxvn+K4jzkB/nN/I4vwnj3HSWndPnbTyw24PSLbkSapqjIRPqPVkTWgwVNRUeIPBmyVOIctiRR1YfJLBTumkUg2xaVrVFAYXJhkmtuvUdK4QEvCVvDdxFYDefNwLDMp+hHRVOwWkNWcOgGZbQThXscaxB2EPgIUnwyow8IGAQ6mLReKwqEokswsAqaTtF2+2uBj2Di5uLAlmDpnU5tQj2+hy9FTnyhBEe5rF+G4/1vgtivR9jHb+kWG/Ru+BcqO1sysvoR9gMaSHefV8Jb5FKagIl3aqmmoymSioCJb100DP5bRbHo2QI3u65/wCH4tEcFO/IQfHOHBQHPSgQhsd4zCpj8K4rgOHdPgyPcxieGAfDezgMhyaH4TCBJ9kOeXxRfy+Bp3jn5nlntKj30y44IvNaLwleuVYVKnTmGuGjBJ6Wgjgk0q5IJepCTIyJSVGuTnSLYiIRS9JIpLtbjMo0Ek+IYkSBYxfU/RECxyfW/WfyAPosgfdJ8yf0Ul7lT6saNeG5MeX//QSev3j5j9WIfvk/MRmwJwl84DKAfSFvprxI4CUfgSp42Uf6g4h0iBOXBy5EDmfsO8dirlFH6E7xBPgQT4APewng9cuNyzvgI8i/fGValzOViiuqk/gWxyNWpui2hkP+r4jNuFd4hc1Y68F2mqN/Jof+R3PofyyHvuOhf9msuz7rH+esf2Ic65/krH9qctbPEvi0x3renvQzBD57vjh/Lg93nyfwBans4tyNV8kvTsbUlwh8+TKY+kpepr5K4Gv+L5BK+LrP1DdYI7ZfvRf0kgXOM53YNLY6J0X30dbVqypWdTgni9rdnatrVrsn3IPOiHPgbuc57MUOO/vwOOw86JzGvuwIvo/gcaJN2BCXnFPTnWFjrvusO7zG2eYOF85xdzuHsvd6mvxNr9H6Vht8m6P5HY7md8ei+b3/C5rxHJrRMWh+P4fmDziaP8yh+aMcmj/OofmTK0Lzpz6aP+No/nwcmr/gaP5ycjR/ReDXF0XzNwR+ex7N3+VB8/cE/vDfoPnHydD8E4E/Xwaaf8mL5l8J/I17pEqEv/to/oOjuemiaB44j6Zz9BLRHGkT5hc4R50Rd0ci7JH4T4/EV9vgX0gitUP/BnN/7IU= \ No newline at end of file diff --git a/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count_2.gz b/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count_2.gz new file mode 100644 index 00000000..e7dcaf37 --- /dev/null +++ b/tests/client/cassetes/test_items/test_items_list_iter_with_start_and_count_2.gz @@ -0,0 +1 @@ +eJy1lolfFOcZxwGPRPDIoTHmaFYUuiKzxywsuxBqFxFQEJAro+0rfZl94R3Ynd1nDg4TmoTaFDQ2amzapFGSNIfaNk1sksY0ie07PdL7vo+/pe+8swrCJvjBTxd2Z97nnXmP5/n+nud9rGgSCvzKLQUFBaPEMLWMDoWthcpa3tZ0ixhYtbjNhCIEK/yTsNJ71CBgE9OCVZOw2q+s5pY0sWgmCbcoK3mjs6O7B25VVvBb29BgjbKd31HLytYGg5FIwPuvDtfGQrFQEGe1oGHrgWGTz10s3h/IJCegRGnjt1kjM0xUq74qLIfDsXIzqyWJUU9NyeLzS16znE+O68tqGspk2bSwRfi1LJLYyS98YF3Th1xDTSOsFYunBPOXTFg3Cev9SjG39JrEkBJDRLdgA4LblLvdiSf4hnTJVA2+QH2I2gNBOSAHwnA7Vjbw/oSqkqwl7dHVTJL3wx0I7lTW8Y6hI1q20pckgym+EtiIhXu8p2ETgruEWyqCFbAZi8l3Z3SdCC/D3Qi2CNsIIVkJp7RRAvd4A+zOZEY0AvciuE9p5W3VNAatzAjR5fpEovpIa0t3g62GaWQfbolXZZuzPY10PGmOJeNjBw5ioyNalWxVMrGGrrZ4KJrqNmNqu56Mk8SeLqUR7sfKem8hFneB1Eb0IYvCpxA8oBRxe00UfFgQcfWJnoksga0ISpWt3Iqz2ZSmYncHwXFpbGxMGswYack2UsR1DknCNiw8k7C5Sw3tiHgUtisytzVgU1N9h9I3+ncwCmW2rdwqIDSzHE0C5ZPwaY9CN/q2CRzUHX5Bkjs/VLR+JAKfJqaJhwjsFNvqaIVKm3IIJA8CRerNmpZBcBoCCILK7dyWxCaV0tjkQujnGIyGIYSV2+Z54lr4wwhkMaMbfoh4QesmBhcVVCGoFv7ThzR9PBgOhF2Oonl8WoMgJsaf71MhjDgWoze6TNUiqFPu562HSLLSJ0d8Harlk0PhuC8s14YitVVhX/P+HngQK6vcoVKpzBjUI/iMCIKrzUpfR2fP3o72bti1iMHPIkgsZLDBm3wUGxOwG0HjfCL3YJuWTEJTzv+G64xmtd91vGoWewQUQ4vSybsfYMfdwPHPCnaxryKwt3KH1McudTnHnKf6nNMH+PVYh3OsrdBoW8tOrF0nOW85JyTnbecEOzNcyF9zrjhXVhSw1wv3Rst4E/aKWbnQND0M+56A1i5oM217Evb7Kc9N7X46LyF10DXKg3OJyLQyBqchMJoOzBN5QM2kg5pF0mZQ9j7BmngwuouTZVj1IeikJbRFmbq6GXYuwd5ARc4V5MzEiwu2blzpnHqkfeuuaLbfeTXKptgMu8yedZ50zjZXUec8O1/snGOnnVPOK71NO50LafYO+4C91sQ+HELsUuNdzsts+j5n+s6JTl8xe6FlY4LN+Nnj8oHVm9hLRexio+08vYE9F2bHy9dzd8AByvfc1QXdLsY9i3JZL4K+T85lD2HK85aSJ28dxJQnq0N0M6Y8K32O3uNxr+ZoJVe5/zwCNMf9YUx52ugX+opXwRcwXbbKsW1TLuwBP+VqVv2Ui5heE25SCJf4lXtzudU0JVdIRiYlJVzepQ5D42KDQQRDiktOBdA8etMQDCtbuNUi41Ywm8KaXudTKTZMYtX39jRJMRjJmxxTCNJi3BDoWIzQzdFX+bAG1nlaMiypm6i8+FkTkEGQVSQ3/+BxiRNXHwlXR6KhUKjOp+lqyk6SbnugMZPmk5t1vqxBUhmcrAPwhGXQZsVVDJgCbEuAbV8H9igHu/qGwB7ODMA1ru1s0o30GC1RdvK3Hy4VtbO01lc6qOmaSUmytNJXOkImXJNXf4UWSidh3HX/hJ9ywo5QD6KHqYfMIzlkJjkyLgxfFDGrisCjNwHDYx4MjwsYphbA8CUBw9GlYfgygicoxQuz/VcQTIuS7nrnarYfIwMwg0URUCQ3j2sqkfpyJ6RjCI4rft4lk4FQlYrViByR4zKuiQ3KciwWiZNQaHBQDmMSqo7JckiFJ68rCCcQfHVxQXgqD6AnEZxSNi8qsm5JSGk6MeH0vLrwNIIzn1gXqkRd+NpSwD6D4OvLAPYbeZXyLILnPASi8E0P6ec50gFBXB64OHJ8xNFrLOZOcBy6s0IA54QAZl0BuAep5j098ALnH99cYs+ZyuSmmjj/ifJvpFzN2Drv8o6X/Xyt8CItaXdhe0mg/60c+i/n0H9F2bQoUmlzKIvVEXhViOI1VxTLVsF5TwUXhAouLlDBt4UKvrO0Cr6L4HVXBXmPMd9D8MZcOn8zD5GXEHxfnOfz7nNh9nxrKdbeRvDOMlj7QV7W3kVwWfgjGoH3PNZ+SJt5vT503eGjYOQwO8Or8hR7f3sZe3M77GDTocN3sFl29hC77DzjHHVOOtP8e7LBmXWe73Jm25xZ9u4Wdp4dXcVedI6u7GYzawrYhdr4qiY3Nb/v1uEPuuBDQegVQeiP5hP64/8LodU5QsPzCGU5Qh1B6E9yhP40R+jPqMfhz2+Kw488Dn8hOPzlAg5/JTj89dIc/gbBbz+Ww98h+P0ch3/Iw+EfEfzpxjn881Ic/gXBX5fB4d/ycvh3BP8Q/qiOwD89Dv8lONz3MRy+N8chWoLDjQWF+7exqW0ud/92uftPF/yXc0fswP8AxSAxgQ== \ No newline at end of file diff --git a/tests/client/test_items.py b/tests/client/test_items.py index ea19d1bc..79f68c33 100644 --- a/tests/client/test_items.py +++ b/tests/client/test_items.py @@ -1,9 +1,11 @@ import pytest from six.moves import range +from .utils import normalize_job_for_tests -def _add_test_items(job): - for i in range(3): + +def _add_test_items(job, size=3): + for i in range(size): job.items.write({'id': i, 'data': 'data' + str(i)}) job.items.flush() job.items.close() @@ -28,6 +30,7 @@ def test_items_iter(spider, json_and_msgpack): def test_items_list(spider, json_and_msgpack): job = spider.jobs.run(meta={'state': 'running'}) + job = normalize_job_for_tests(job) _add_test_items(job) o = job.items.list() @@ -36,3 +39,67 @@ def test_items_list(spider, json_and_msgpack): assert o[0] == {'id': 0, 'data': 'data0'} assert o[1] == {'id': 1, 'data': 'data1'} assert o[2] == {'id': 2, 'data': 'data2'} + + +def test_items_list_iter(spider, json_and_msgpack): + job = spider.jobs.run(meta={'state': 'running'}) + job = normalize_job_for_tests(job) + _add_test_items(job) + job.finish() + + o = job.items.list_iter(chunksize=2) + assert next(o) == [ + {'id': 0, 'data': 'data0'}, + {'id': 1, 'data': 'data1'}, + ] + assert next(o) == [ + {'id': 2, 'data': 'data2'}, + ] + with pytest.raises(StopIteration): + next(o) + + +def test_items_list_iter_with_start_and_count(spider, json_and_msgpack): + job = spider.jobs.run(meta={'state': 'running'}) + job = normalize_job_for_tests(job) + _add_test_items(job, size=10) + job.finish() + + o = job.items.list_iter(chunksize=3, start=3, count=7) + assert next(o) == [ + {'id': 3, 'data': 'data3'}, + {'id': 4, 'data': 'data4'}, + {'id': 5, 'data': 'data5'}, + ] + assert next(o) == [ + {'id': 6, 'data': 'data6'}, + {'id': 7, 'data': 'data7'}, + {'id': 8, 'data': 'data8'}, + ] + assert next(o) == [ + {'id': 9, 'data': 'data9'}, + ] + with pytest.raises(StopIteration): + next(o) + + +def test_items_list_iter_with_start_and_count_2(spider, json_and_msgpack): + """2nd version from the test above but this case makes sure that the total + number of items returned would be equal to `count`. + """ + + job = spider.jobs.run(meta={'state': 'running'}) + job = normalize_job_for_tests(job) + _add_test_items(job, size=10) + job.finish() + + o = job.items.list_iter(chunksize=2, start=3, count=3) + assert next(o) == [ + {'id': 3, 'data': 'data3'}, + {'id': 4, 'data': 'data4'}, + ] + assert next(o) == [ + {'id': 5, 'data': 'data5'}, + ] + with pytest.raises(StopIteration): + next(o) diff --git a/tests/client/utils.py b/tests/client/utils.py index 8e60cb39..4e490de0 100644 --- a/tests/client/utils.py +++ b/tests/client/utils.py @@ -13,3 +13,30 @@ def validate_default_meta(meta, state='pending', units=1, assert meta.get('units') == units assert meta.get('api_url') == TEST_DASH_ENDPOINT assert meta.get('portia_url') + + +def normalize_job_for_tests(job): + """A temporary workaround to deal with VCR.py cassettes(snapshots). + + The existing tests highly rely on VCR.py which creates snapshots of real + HTTP requests and responses, and during the test process tries to match + requests with the snapshots. Sometimes it's hard to run an appropriate test + environment locally, so we allow to use our servers to create snapshots + for new tests, by "normalizing" the snapshots via patching hosts/credentials + on-the-fly before saving it (see #112). + + The problem here is that we patch only requests data and not responses data, + which is pretty difficult to unify over the whole client. It means that if + some test gets data from API (say, a new job ID) and uses it to form another + requests (get the job data), it will form the HTTP requests differently, + thus it won't match with the snapshots during the test process and the tests + will fail. + + As a temporary workaround, the helper gets a Job instance, extracts its key, + replaces the project ID part with TEST_PROJECT_ID, and returns a new Job. + So, the other requests done via the new job instance (updating job items, + accessing job logs, etc) will be done using proper URLs matching with + existing snapshots. + """ + normalized_key = '{}/{}'.format(TEST_PROJECT_ID, job.key.split('/', 1)[1]) + return job._client.get_job(normalized_key) \ No newline at end of file