From 01ee89f9407c037b3543db7b5b7152d2210a6e81 Mon Sep 17 00:00:00 2001 From: Howard Butler Date: Fri, 15 Nov 2024 13:08:56 -0600 Subject: [PATCH] Support limiting dimensions with execute() and executeStreaming() --- src/pdal/PyArray.cpp | 5 +++-- src/pdal/PyArray.hpp | 3 ++- src/pdal/PyPipeline.cpp | 15 +++++++++++++-- src/pdal/PyPipeline.hpp | 4 ++-- src/pdal/StreamableExecutor.cpp | 8 +++++++- src/pdal/StreamableExecutor.hpp | 3 ++- src/pdal/libpdalpython.cpp | 25 ++++++++++++------------- test/test_pipeline.py | 23 +++++++++++++++++++++++ 8 files changed, 64 insertions(+), 22 deletions(-) diff --git a/src/pdal/PyArray.cpp b/src/pdal/PyArray.cpp index 55f87b5d..0dc875d9 100644 --- a/src/pdal/PyArray.cpp +++ b/src/pdal/PyArray.cpp @@ -78,7 +78,7 @@ Dimension::Type pdalType(int t) return Type::None; } -std::string toString(PyObject *pname) +std::string pyObjectToString(PyObject *pname) { PyObject* r = PyObject_Str(pname); if (!r) @@ -92,6 +92,7 @@ std::string toString(PyObject *pname) #if NPY_ABI_VERSION < 0x02000000 #define PyDataType_FIELDS(descr) ((descr)->fields) + #define PyDataType_NAMES(descr) ((descr)->names) #endif Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true) @@ -124,7 +125,7 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true) for (int i = 0; i < numFields; ++i) { - std::string name = toString(PyList_GetItem(names, i)); + std::string name = python::pyObjectToString(PyList_GetItem(names, i)); if (name == "X") xyz |= 1; else if (name == "Y") diff --git a/src/pdal/PyArray.hpp b/src/pdal/PyArray.hpp index e2da961f..d2fb9674 100644 --- a/src/pdal/PyArray.hpp +++ b/src/pdal/PyArray.hpp @@ -55,6 +55,7 @@ namespace pdal namespace python { + class ArrayIter; @@ -87,7 +88,7 @@ class PDAL_DLL Array }; -class ArrayIter +class PDAL_DLL ArrayIter { public: ArrayIter(const ArrayIter&) = delete; diff --git a/src/pdal/PyPipeline.cpp b/src/pdal/PyPipeline.cpp index 85ea028a..b64ef3bb 100644 --- a/src/pdal/PyPipeline.cpp +++ b/src/pdal/PyPipeline.cpp @@ -73,8 +73,13 @@ PipelineExecutor::PipelineExecutor( } -point_count_t PipelineExecutor::execute() +point_count_t PipelineExecutor::execute(pdal::StringList allowedDims) { + if (allowedDims.size()) + { + m_manager.pointTable().layout()->setAllowedDims(allowedDims); + } + point_count_t count = m_manager.execute(); m_executed = true; return count; @@ -92,9 +97,14 @@ std::string PipelineExecutor::getSrsWKT2() const return output; } -point_count_t PipelineExecutor::executeStream(point_count_t streamLimit) +point_count_t PipelineExecutor::executeStream(point_count_t streamLimit, + pdal::StringList allowedDims) { CountPointTable table(streamLimit); + if (allowedDims.size()) + { + pointTable().layout()->setAllowedDims(allowedDims); + } m_manager.executeStream(table); m_executed = true; return table.count(); @@ -272,6 +282,7 @@ PyObject* buildNumpyDescriptor(PointLayoutPtr layout) { return layout->dimOffset(id1) < layout->dimOffset(id2); }; + auto dims = layout->dims(); std::sort(dims.begin(), dims.end(), sortByOffset); diff --git a/src/pdal/PyPipeline.hpp b/src/pdal/PyPipeline.hpp index c32abcfe..5233763f 100644 --- a/src/pdal/PyPipeline.hpp +++ b/src/pdal/PyPipeline.hpp @@ -60,8 +60,8 @@ class PDAL_DLL PipelineExecutor { PipelineExecutor(std::string const& json, std::vector> arrays, int level); virtual ~PipelineExecutor() = default; - point_count_t execute(); - point_count_t executeStream(point_count_t streamLimit); + point_count_t execute(pdal::StringList allowedDims); + point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims); const PointViewSet& views() const; std::string getPipeline() const; diff --git a/src/pdal/StreamableExecutor.cpp b/src/pdal/StreamableExecutor.cpp index 9f5b4b8b..5fa01931 100644 --- a/src/pdal/StreamableExecutor.cpp +++ b/src/pdal/StreamableExecutor.cpp @@ -187,11 +187,17 @@ StreamableExecutor::StreamableExecutor(std::string const& json, std::vector> arrays, int level, point_count_t chunkSize, - int prefetch) + int prefetch, + pdal::StringList allowedDims) : PipelineExecutor(json, arrays, level) , m_table(chunkSize, prefetch) , m_exc(nullptr) { + + if (allowedDims.size()) + { + m_table.layout()->setAllowedDims(allowedDims); + } m_thread.reset(new std::thread([this]() { try { diff --git a/src/pdal/StreamableExecutor.hpp b/src/pdal/StreamableExecutor.hpp index 45d015c3..f565c8ee 100644 --- a/src/pdal/StreamableExecutor.hpp +++ b/src/pdal/StreamableExecutor.hpp @@ -81,7 +81,8 @@ class StreamableExecutor : public PipelineExecutor std::vector> arrays, int level, point_count_t chunkSize, - int prefetch); + int prefetch, + pdal::StringList allowedDim); ~StreamableExecutor(); MetadataNode getMetadata() { return m_table.metadata(); } diff --git a/src/pdal/libpdalpython.cpp b/src/pdal/libpdalpython.cpp index 229f928d..e5fc353a 100644 --- a/src/pdal/libpdalpython.cpp +++ b/src/pdal/libpdalpython.cpp @@ -165,28 +165,27 @@ namespace pdal { class Pipeline { public: - point_count_t execute() { + point_count_t execute(pdal::StringList allowedDims) { point_count_t response(0); { py::gil_scoped_release release; - response = getExecutor()->execute(); + response = getExecutor()->execute(allowedDims); } return response; - } - point_count_t executeStream(point_count_t streamLimit) { + point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims) { point_count_t response(0); { py::gil_scoped_release release; - response = getExecutor()->executeStream(streamLimit); + response = getExecutor()->executeStream(streamLimit, allowedDims); } return response; } - std::unique_ptr iterator(int chunk_size, int prefetch) { + std::unique_ptr iterator(int chunk_size, int prefetch, pdal::StringList allowedDims) { return std::unique_ptr(new PipelineIterator( - getJson(), _inputs, _loglevel, chunk_size, prefetch + getJson(), _inputs, _loglevel, chunk_size, prefetch, allowedDims )); } @@ -308,9 +307,9 @@ namespace pdal { py::class_(m, "Pipeline") .def(py::init<>()) - .def("execute", &Pipeline::execute) - .def("execute_streaming", &Pipeline::executeStream, "chunk_size"_a=10000) - .def("iterator", &Pipeline::iterator, "chunk_size"_a=10000, "prefetch"_a=0) + .def("execute", &Pipeline::execute, py::arg("allowed_dims") =py::list()) + .def("execute_streaming", &Pipeline::executeStream, "chunk_size"_a=10000, py::arg("allowed_dims") =py::list()) + .def("iterator", &Pipeline::iterator, "chunk_size"_a=10000, "prefetch"_a=0, py::arg("allowed_dims") =py::list()) .def_property("inputs", nullptr, &Pipeline::setInputs) .def_property("loglevel", &Pipeline::getLoglevel, &Pipeline::setLogLevel) .def_property_readonly("log", &Pipeline::getLog) @@ -333,10 +332,10 @@ namespace pdal { m.def("infer_writer_driver", &getWriterDriver); if (pdal::Config::versionMajor() < 2) - throw pybind11::import_error("PDAL version must be >= 2.6"); + throw pybind11::import_error("PDAL version must be >= 2.7"); - if (pdal::Config::versionMajor() == 2 && pdal::Config::versionMinor() < 6) - throw pybind11::import_error("PDAL version must be >= 2.6"); + if (pdal::Config::versionMajor() == 2 && pdal::Config::versionMinor() < 7) + throw pybind11::import_error("PDAL version must be >= 2.7"); }; }; // namespace pdal diff --git a/test/test_pipeline.py b/test/test_pipeline.py index c0c417a8..fcec914a 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -82,6 +82,17 @@ def test_execute_streaming(self, filename): count2 = r.execute_streaming(chunk_size=100) assert count == count2 + + @pytest.mark.parametrize("filename", ["range.json", "range.py"]) + def test_subsetstreaming(self, filename): + """Can we fetch a subset of PDAL dimensions as a numpy array while streaming""" + r = get_pipeline(filename) + limit = ['X','Y','Z','Intensity'] + arrays = list(r.iterator(chunk_size=100,allowed_dims=limit)) + assert len(arrays) == 11 + assert len(arrays[0].dtype) == 4 + + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) def test_execute_streaming_non_streamable(self, filename): r = get_pipeline(filename) @@ -113,6 +124,18 @@ def test_array(self, filename): assert a[0][0] == 635619.85 assert a[1064][2] == 456.92 + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_subsetarray(self, filename): + """Can we fetch a subset of PDAL dimensions as a numpy array""" + r = get_pipeline(filename) + limit = ['X','Y','Z'] + r.execute(allowed_dims=limit) + arrays = r.arrays + assert len(arrays) == 1 + assert len(arrays[0].dtype) == 3 + + + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) def test_metadata(self, filename): """Can we fetch PDAL metadata"""