Skip to content

Commit

Permalink
Support limiting dimensions with execute() and executeStreaming()
Browse files Browse the repository at this point in the history
  • Loading branch information
hobu committed Nov 15, 2024
1 parent f2fdc7c commit 01ee89f
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 22 deletions.
5 changes: 3 additions & 2 deletions src/pdal/PyArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Dimension::Type pdalType(int t)
return Type::None;
}

std::string toString(PyObject *pname)
std::string pyObjectToString(PyObject *pname)
{
PyObject* r = PyObject_Str(pname);
if (!r)
Expand All @@ -92,6 +92,7 @@ std::string toString(PyObject *pname)

#if NPY_ABI_VERSION < 0x02000000
#define PyDataType_FIELDS(descr) ((descr)->fields)
#define PyDataType_NAMES(descr) ((descr)->names)
#endif

Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true)
Expand Down Expand Up @@ -124,7 +125,7 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true)

for (int i = 0; i < numFields; ++i)
{
std::string name = toString(PyList_GetItem(names, i));
std::string name = python::pyObjectToString(PyList_GetItem(names, i));
if (name == "X")
xyz |= 1;
else if (name == "Y")
Expand Down
3 changes: 2 additions & 1 deletion src/pdal/PyArray.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace pdal
namespace python
{


class ArrayIter;


Expand Down Expand Up @@ -87,7 +88,7 @@ class PDAL_DLL Array
};


class ArrayIter
class PDAL_DLL ArrayIter
{
public:
ArrayIter(const ArrayIter&) = delete;
Expand Down
15 changes: 13 additions & 2 deletions src/pdal/PyPipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,13 @@ PipelineExecutor::PipelineExecutor(
}


point_count_t PipelineExecutor::execute()
point_count_t PipelineExecutor::execute(pdal::StringList allowedDims)
{
if (allowedDims.size())
{
m_manager.pointTable().layout()->setAllowedDims(allowedDims);
}

point_count_t count = m_manager.execute();
m_executed = true;
return count;
Expand All @@ -92,9 +97,14 @@ std::string PipelineExecutor::getSrsWKT2() const
return output;
}

point_count_t PipelineExecutor::executeStream(point_count_t streamLimit)
point_count_t PipelineExecutor::executeStream(point_count_t streamLimit,
pdal::StringList allowedDims)
{
CountPointTable table(streamLimit);
if (allowedDims.size())
{
pointTable().layout()->setAllowedDims(allowedDims);
}
m_manager.executeStream(table);
m_executed = true;
return table.count();
Expand Down Expand Up @@ -272,6 +282,7 @@ PyObject* buildNumpyDescriptor(PointLayoutPtr layout)
{
return layout->dimOffset(id1) < layout->dimOffset(id2);
};

auto dims = layout->dims();
std::sort(dims.begin(), dims.end(), sortByOffset);

Expand Down
4 changes: 2 additions & 2 deletions src/pdal/PyPipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ class PDAL_DLL PipelineExecutor {
PipelineExecutor(std::string const& json, std::vector<std::shared_ptr<Array>> arrays, int level);
virtual ~PipelineExecutor() = default;

point_count_t execute();
point_count_t executeStream(point_count_t streamLimit);
point_count_t execute(pdal::StringList allowedDims);
point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims);

const PointViewSet& views() const;
std::string getPipeline() const;
Expand Down
8 changes: 7 additions & 1 deletion src/pdal/StreamableExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,17 @@ StreamableExecutor::StreamableExecutor(std::string const& json,
std::vector<std::shared_ptr<Array>> arrays,
int level,
point_count_t chunkSize,
int prefetch)
int prefetch,
pdal::StringList allowedDims)
: PipelineExecutor(json, arrays, level)
, m_table(chunkSize, prefetch)
, m_exc(nullptr)
{

if (allowedDims.size())
{
m_table.layout()->setAllowedDims(allowedDims);
}
m_thread.reset(new std::thread([this]()
{
try {
Expand Down
3 changes: 2 additions & 1 deletion src/pdal/StreamableExecutor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ class StreamableExecutor : public PipelineExecutor
std::vector<std::shared_ptr<Array>> arrays,
int level,
point_count_t chunkSize,
int prefetch);
int prefetch,
pdal::StringList allowedDim);
~StreamableExecutor();

MetadataNode getMetadata() { return m_table.metadata(); }
Expand Down
25 changes: 12 additions & 13 deletions src/pdal/libpdalpython.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,28 +165,27 @@ namespace pdal {

class Pipeline {
public:
point_count_t execute() {
point_count_t execute(pdal::StringList allowedDims) {
point_count_t response(0);
{
py::gil_scoped_release release;
response = getExecutor()->execute();
response = getExecutor()->execute(allowedDims);
}
return response;

}

point_count_t executeStream(point_count_t streamLimit) {
point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims) {
point_count_t response(0);
{
py::gil_scoped_release release;
response = getExecutor()->executeStream(streamLimit);
response = getExecutor()->executeStream(streamLimit, allowedDims);
}
return response;
}

std::unique_ptr<PipelineIterator> iterator(int chunk_size, int prefetch) {
std::unique_ptr<PipelineIterator> iterator(int chunk_size, int prefetch, pdal::StringList allowedDims) {
return std::unique_ptr<PipelineIterator>(new PipelineIterator(
getJson(), _inputs, _loglevel, chunk_size, prefetch
getJson(), _inputs, _loglevel, chunk_size, prefetch, allowedDims
));
}

Expand Down Expand Up @@ -308,9 +307,9 @@ namespace pdal {

py::class_<Pipeline>(m, "Pipeline")
.def(py::init<>())
.def("execute", &Pipeline::execute)
.def("execute_streaming", &Pipeline::executeStream, "chunk_size"_a=10000)
.def("iterator", &Pipeline::iterator, "chunk_size"_a=10000, "prefetch"_a=0)
.def("execute", &Pipeline::execute, py::arg("allowed_dims") =py::list())
.def("execute_streaming", &Pipeline::executeStream, "chunk_size"_a=10000, py::arg("allowed_dims") =py::list())
.def("iterator", &Pipeline::iterator, "chunk_size"_a=10000, "prefetch"_a=0, py::arg("allowed_dims") =py::list())
.def_property("inputs", nullptr, &Pipeline::setInputs)
.def_property("loglevel", &Pipeline::getLoglevel, &Pipeline::setLogLevel)
.def_property_readonly("log", &Pipeline::getLog)
Expand All @@ -333,10 +332,10 @@ namespace pdal {
m.def("infer_writer_driver", &getWriterDriver);

if (pdal::Config::versionMajor() < 2)
throw pybind11::import_error("PDAL version must be >= 2.6");
throw pybind11::import_error("PDAL version must be >= 2.7");

if (pdal::Config::versionMajor() == 2 && pdal::Config::versionMinor() < 6)
throw pybind11::import_error("PDAL version must be >= 2.6");
if (pdal::Config::versionMajor() == 2 && pdal::Config::versionMinor() < 7)
throw pybind11::import_error("PDAL version must be >= 2.7");
};

}; // namespace pdal
23 changes: 23 additions & 0 deletions test/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ def test_execute_streaming(self, filename):
count2 = r.execute_streaming(chunk_size=100)
assert count == count2


@pytest.mark.parametrize("filename", ["range.json", "range.py"])
def test_subsetstreaming(self, filename):
"""Can we fetch a subset of PDAL dimensions as a numpy array while streaming"""
r = get_pipeline(filename)
limit = ['X','Y','Z','Intensity']
arrays = list(r.iterator(chunk_size=100,allowed_dims=limit))
assert len(arrays) == 11
assert len(arrays[0].dtype) == 4


@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
def test_execute_streaming_non_streamable(self, filename):
r = get_pipeline(filename)
Expand Down Expand Up @@ -113,6 +124,18 @@ def test_array(self, filename):
assert a[0][0] == 635619.85
assert a[1064][2] == 456.92

@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
def test_subsetarray(self, filename):
"""Can we fetch a subset of PDAL dimensions as a numpy array"""
r = get_pipeline(filename)
limit = ['X','Y','Z']
r.execute(allowed_dims=limit)
arrays = r.arrays
assert len(arrays) == 1
assert len(arrays[0].dtype) == 3



@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
def test_metadata(self, filename):
"""Can we fetch PDAL metadata"""
Expand Down

0 comments on commit 01ee89f

Please sign in to comment.