Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support limiting dimensions with execute() and executeStreaming() #184

Merged
merged 1 commit into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/pdal/PyArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Dimension::Type pdalType(int t)
return Type::None;
}

std::string toString(PyObject *pname)
std::string pyObjectToString(PyObject *pname)
{
PyObject* r = PyObject_Str(pname);
if (!r)
Expand All @@ -92,6 +92,7 @@ std::string toString(PyObject *pname)

#if NPY_ABI_VERSION < 0x02000000
#define PyDataType_FIELDS(descr) ((descr)->fields)
#define PyDataType_NAMES(descr) ((descr)->names)
#endif

Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true)
Expand Down Expand Up @@ -124,7 +125,7 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true)

for (int i = 0; i < numFields; ++i)
{
std::string name = toString(PyList_GetItem(names, i));
std::string name = python::pyObjectToString(PyList_GetItem(names, i));
if (name == "X")
xyz |= 1;
else if (name == "Y")
Expand Down
3 changes: 2 additions & 1 deletion src/pdal/PyArray.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace pdal
namespace python
{


class ArrayIter;


Expand Down Expand Up @@ -87,7 +88,7 @@ class PDAL_DLL Array
};


class ArrayIter
class PDAL_DLL ArrayIter
{
public:
ArrayIter(const ArrayIter&) = delete;
Expand Down
15 changes: 13 additions & 2 deletions src/pdal/PyPipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,13 @@ PipelineExecutor::PipelineExecutor(
}


point_count_t PipelineExecutor::execute()
point_count_t PipelineExecutor::execute(pdal::StringList allowedDims)
{
if (allowedDims.size())
{
m_manager.pointTable().layout()->setAllowedDims(allowedDims);
}

point_count_t count = m_manager.execute();
m_executed = true;
return count;
Expand All @@ -92,9 +97,14 @@ std::string PipelineExecutor::getSrsWKT2() const
return output;
}

point_count_t PipelineExecutor::executeStream(point_count_t streamLimit)
point_count_t PipelineExecutor::executeStream(point_count_t streamLimit,
pdal::StringList allowedDims)
{
CountPointTable table(streamLimit);
if (allowedDims.size())
{
pointTable().layout()->setAllowedDims(allowedDims);
}
m_manager.executeStream(table);
m_executed = true;
return table.count();
Expand Down Expand Up @@ -272,6 +282,7 @@ PyObject* buildNumpyDescriptor(PointLayoutPtr layout)
{
return layout->dimOffset(id1) < layout->dimOffset(id2);
};

auto dims = layout->dims();
std::sort(dims.begin(), dims.end(), sortByOffset);

Expand Down
4 changes: 2 additions & 2 deletions src/pdal/PyPipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ class PDAL_DLL PipelineExecutor {
PipelineExecutor(std::string const& json, std::vector<std::shared_ptr<Array>> arrays, int level);
virtual ~PipelineExecutor() = default;

point_count_t execute();
point_count_t executeStream(point_count_t streamLimit);
point_count_t execute(pdal::StringList allowedDims);
point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims);

const PointViewSet& views() const;
std::string getPipeline() const;
Expand Down
8 changes: 7 additions & 1 deletion src/pdal/StreamableExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,17 @@ StreamableExecutor::StreamableExecutor(std::string const& json,
std::vector<std::shared_ptr<Array>> arrays,
int level,
point_count_t chunkSize,
int prefetch)
int prefetch,
pdal::StringList allowedDims)
: PipelineExecutor(json, arrays, level)
, m_table(chunkSize, prefetch)
, m_exc(nullptr)
{

if (allowedDims.size())
{
m_table.layout()->setAllowedDims(allowedDims);
}
m_thread.reset(new std::thread([this]()
{
try {
Expand Down
3 changes: 2 additions & 1 deletion src/pdal/StreamableExecutor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ class StreamableExecutor : public PipelineExecutor
std::vector<std::shared_ptr<Array>> arrays,
int level,
point_count_t chunkSize,
int prefetch);
int prefetch,
pdal::StringList allowedDim);
~StreamableExecutor();

MetadataNode getMetadata() { return m_table.metadata(); }
Expand Down
25 changes: 12 additions & 13 deletions src/pdal/libpdalpython.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,28 +165,27 @@ namespace pdal {

class Pipeline {
public:
point_count_t execute() {
point_count_t execute(pdal::StringList allowedDims) {
point_count_t response(0);
{
py::gil_scoped_release release;
response = getExecutor()->execute();
response = getExecutor()->execute(allowedDims);
}
return response;

}

point_count_t executeStream(point_count_t streamLimit) {
point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims) {
point_count_t response(0);
{
py::gil_scoped_release release;
response = getExecutor()->executeStream(streamLimit);
response = getExecutor()->executeStream(streamLimit, allowedDims);
}
return response;
}

std::unique_ptr<PipelineIterator> iterator(int chunk_size, int prefetch) {
std::unique_ptr<PipelineIterator> iterator(int chunk_size, int prefetch, pdal::StringList allowedDims) {
return std::unique_ptr<PipelineIterator>(new PipelineIterator(
getJson(), _inputs, _loglevel, chunk_size, prefetch
getJson(), _inputs, _loglevel, chunk_size, prefetch, allowedDims
));
}

Expand Down Expand Up @@ -308,9 +307,9 @@ namespace pdal {

py::class_<Pipeline>(m, "Pipeline")
.def(py::init<>())
.def("execute", &Pipeline::execute)
.def("execute_streaming", &Pipeline::executeStream, "chunk_size"_a=10000)
.def("iterator", &Pipeline::iterator, "chunk_size"_a=10000, "prefetch"_a=0)
.def("execute", &Pipeline::execute, py::arg("allowed_dims") =py::list())
.def("execute_streaming", &Pipeline::executeStream, "chunk_size"_a=10000, py::arg("allowed_dims") =py::list())
.def("iterator", &Pipeline::iterator, "chunk_size"_a=10000, "prefetch"_a=0, py::arg("allowed_dims") =py::list())
.def_property("inputs", nullptr, &Pipeline::setInputs)
.def_property("loglevel", &Pipeline::getLoglevel, &Pipeline::setLogLevel)
.def_property_readonly("log", &Pipeline::getLog)
Expand All @@ -333,10 +332,10 @@ namespace pdal {
m.def("infer_writer_driver", &getWriterDriver);

if (pdal::Config::versionMajor() < 2)
throw pybind11::import_error("PDAL version must be >= 2.6");
throw pybind11::import_error("PDAL version must be >= 2.7");

if (pdal::Config::versionMajor() == 2 && pdal::Config::versionMinor() < 6)
throw pybind11::import_error("PDAL version must be >= 2.6");
if (pdal::Config::versionMajor() == 2 && pdal::Config::versionMinor() < 7)
throw pybind11::import_error("PDAL version must be >= 2.7");
};

}; // namespace pdal
23 changes: 23 additions & 0 deletions test/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ def test_execute_streaming(self, filename):
count2 = r.execute_streaming(chunk_size=100)
assert count == count2


@pytest.mark.parametrize("filename", ["range.json", "range.py"])
def test_subsetstreaming(self, filename):
"""Can we fetch a subset of PDAL dimensions as a numpy array while streaming"""
r = get_pipeline(filename)
limit = ['X','Y','Z','Intensity']
arrays = list(r.iterator(chunk_size=100,allowed_dims=limit))
assert len(arrays) == 11
assert len(arrays[0].dtype) == 4


@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
def test_execute_streaming_non_streamable(self, filename):
r = get_pipeline(filename)
Expand Down Expand Up @@ -113,6 +124,18 @@ def test_array(self, filename):
assert a[0][0] == 635619.85
assert a[1064][2] == 456.92

@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
def test_subsetarray(self, filename):
"""Can we fetch a subset of PDAL dimensions as a numpy array"""
r = get_pipeline(filename)
limit = ['X','Y','Z']
r.execute(allowed_dims=limit)
arrays = r.arrays
assert len(arrays) == 1
assert len(arrays[0].dtype) == 3



@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
def test_metadata(self, filename):
"""Can we fetch PDAL metadata"""
Expand Down
Loading