Merge dev to master for Open3D 0.17 release. (#586)

isl-org · Mar 3, 2023 · 7196c76 · 7196c76
2 parents 643e15c + f28224e
commit 7196c76
Show file tree

Hide file tree

Showing 28 changed files with 155 additions and 78 deletions.
diff --git a/README.md b/README.md
@@ -46,7 +46,9 @@ respective requirements files:
 ```bash
 # To install a compatible version of TensorFlow
 pip install -r requirements-tensorflow.txt
-# To install a compatible version of PyTorch with CUDA
+# To install a compatible version of PyTorch
+pip install -r requirements-torch.txt
+# To install a compatible version of PyTorch with CUDA on Linux
 pip install -r requirements-torch-cuda.txt
 ```
 
@@ -338,15 +340,17 @@ The table shows the available models and datasets for the segmentation task and
 For the task of object detection, we measure the performance of different methods using the mean average precision (mAP) for bird's eye view (BEV) and 3D.
 The table shows the available models and datasets for the object detection task and the respective scores. Each score links to the respective weight file.
 For the evaluation, the models were evaluated using the validation subset, according to KITTI's validation criteria. The models were trained for three classes (car, pedestrian and cyclist). The calculated values are the mean value over the mAP of all classes for all difficulty levels.
+For the Waymo dataset, the models were trained on three classes (pedestrian, vehicle, cyclist).
 
 
-| Model / Dataset    | KITTI [BEV / 3D] @ 0.70|
-|--------------------|---------------|
-| PointPillars (tf)    | [61.6 / 55.2](https://storage.googleapis.com/open3d-releases/model-zoo/pointpillars_kitti_202012221652utc.zip) |
-| PointPillars (torch) | [61.2 / 52.8](https://storage.googleapis.com/open3d-releases/model-zoo/pointpillars_kitti_202012221652utc.pth)   |
-| PointRCNN (tf)       | [78.2 / 65.9](https://storage.googleapis.com/open3d-releases/model-zoo/pointrcnn_kitti_202105071146utc.zip) |
-| PointRCNN (torch)    | [78.2 / 65.9](https://storage.googleapis.com/open3d-releases/model-zoo/pointrcnn_kitti_202105071146utc.pth) |
+| Model / Dataset    | KITTI [BEV / 3D] @ 0.70| Waymo (BEV / 3D) @ 0.50 |
+|--------------------|------------------------|------------------|
+| PointPillars (tf)    | [61.6 / 55.2](https://storage.googleapis.com/open3d-releases/model-zoo/pointpillars_kitti_202012221652utc.zip) | - |
+| PointPillars (torch) | [61.2 / 52.8](https://storage.googleapis.com/open3d-releases/model-zoo/pointpillars_kitti_202012221652utc.pth)  | avg: 61.01 / 48.30 \| [best: 61.47	/ 57.55](https://storage.googleapis.com/open3d-releases/model-zoo/pointpillars_waymo_202211200158utc_seed2_gpu16.pth) [^wpp-train] |
+| PointRCNN (tf)       | [78.2 / 65.9](https://storage.googleapis.com/open3d-releases/model-zoo/pointrcnn_kitti_202105071146utc.zip) | - |
+| PointRCNN (torch)    | [78.2 / 65.9](https://storage.googleapis.com/open3d-releases/model-zoo/pointrcnn_kitti_202105071146utc.pth) | - |
 
+[^wpp-train]: The avg. metrics are the average of three sets of training runs with 4, 8, 16 and 32 GPUs. Training was for halted after 30 epochs. Model checkpoint is available for the best training run.
 
 #### Training PointRCNN
 
@@ -402,6 +406,7 @@ For downloading these datasets visit the respective webpages and have a look at
 * [Visualize custom data](docs/howtos.md#visualize-custom-data)
 * [Adding a new model](docs/howtos.md#adding-a-new-model)
 * [Adding a new dataset](docs/howtos.md#adding-a-new-dataset)
+* [Distributed training](docs/howtos.md#distributed-training)
 * [Visualize and compare input data, ground truth and results in TensorBoard](docs/tensorboard.md)
 * [Inference with Intel OpenVINO](docs/openvino.md)
 

diff --git a/ci/run_ci.sh b/ci/run_ci.sh
@@ -4,11 +4,11 @@
 # - NPROC
 #
 TENSORFLOW_VER="2.8.2"
-TORCH_GLNX_VER="1.12.0+cpu"
+TORCH_GLNX_VER="1.13.1+cpu"
 # OPENVINO_DEV_VER="2021.4.2"  # Numpy version conflict with TF 2.8.2
 PIP_VER="21.1.1"
-WHEEL_VER="0.37.1"
-STOOLS_VER="50.3.2"
+WHEEL_VER="0.38.4"
+STOOLS_VER="67.3.2"
 YAPF_VER="0.30.0"
 PYTEST_VER="7.1.2"
 PYTEST_RANDOMLY_VER="3.8.0"
@@ -21,11 +21,11 @@ export PATH_TO_OPEN3D_ML=$(pwd)
 # the build system of the main repo expects a master branch. make sure master exists
 git checkout -b master || true
 python -m pip install -U pip==$PIP_VER \
-	wheel=="$WHEEL_VER" \
-	setuptools=="$STOOLS_VER" \
-	yapf=="$YAPF_VER" \
-	pytest=="$PYTEST_VER" \
-	pytest-randomly=="$PYTEST_RANDOMLY_VER"
+    wheel=="$WHEEL_VER" \
+    setuptools=="$STOOLS_VER" \
+    yapf=="$YAPF_VER" \
+    pytest=="$PYTEST_VER" \
+    pytest-randomly=="$PYTEST_RANDOMLY_VER"
 
 python -m pip install -r requirements.txt
 echo $PATH_TO_OPEN3D_ML
@@ -38,23 +38,23 @@ git clone --recursive --branch master https://github.com/isl-org/Open3D.git
 
 ./Open3D/util/install_deps_ubuntu.sh assume-yes
 python -m pip install -U tensorflow-cpu==$TENSORFLOW_VER \
-	torch==${TORCH_GLNX_VER} --extra-index-url https://download.pytorch.org/whl/cpu/
+    torch==${TORCH_GLNX_VER} --extra-index-url https://download.pytorch.org/whl/cpu/
 # openvino-dev=="$OPENVINO_DEV_VER"
 
 echo 3. Configure for bundling the Open3D-ML part
 echo
 mkdir Open3D/build
 pushd Open3D/build
 cmake -DBUNDLE_OPEN3D_ML=ON \
-	-DOPEN3D_ML_ROOT=$PATH_TO_OPEN3D_ML \
-	-DGLIBCXX_USE_CXX11_ABI=OFF \
-	-DBUILD_TENSORFLOW_OPS=ON \
-	-DBUILD_PYTORCH_OPS=ON \
-	-DBUILD_GUI=ON \
-	-DBUILD_UNIT_TESTS=OFF \
-	-DBUILD_BENCHMARKS=OFF \
-	-DBUILD_EXAMPLES=OFF \
-	..
+    -DOPEN3D_ML_ROOT=$PATH_TO_OPEN3D_ML \
+    -DGLIBCXX_USE_CXX11_ABI=OFF \
+    -DBUILD_TENSORFLOW_OPS=ON \
+    -DBUILD_PYTORCH_OPS=ON \
+    -DBUILD_GUI=ON \
+    -DBUILD_UNIT_TESTS=OFF \
+    -DBUILD_BENCHMARKS=OFF \
+    -DBUILD_EXAMPLES=OFF \
+    ..
 
 echo 4. Build and install wheel
 echo

diff --git a/docs/howtos.md b/docs/howtos.md
@@ -29,7 +29,7 @@ Second, we will construct the networks and pipelines, load the pretrained weight
     from ml3d.torch.models import RandLANet, KPFCNN
 
     kpconv_url = "https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_semantickitti_202009090354utc.pth"
-    randlanet_url = "https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202009090354utc.pth"
+    randlanet_url = "https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202201071330utc.pth"
 
     ckpt_path = "./logs/vis_weights_{}.pth".format('RandLANet')
     if not exists(ckpt_path):
@@ -243,3 +243,18 @@ import open3d.ml.torch as ml3d
 model = ml3d.models.MyModel()
 dataset = ml3d.datasets.MyDataset()
 ```
+
+## Distributed training (preview)
+
+Open3D-ML currently supports distributed training with PyTorch for object detection on Waymo with the PointPillars model. More comprehensive support for semantic segmentation models will follow shortly.
+
+Distributed training uses the PyTorch Distributed Data Parallel (DDP) module and can be used to distribute training across multiple computer nodes, each with multiple GPUs. Here is a chart of per eopch runtime showing the speedup of sample runs with increasing number of GPUs. The training was run on a cluster containing 4 nodes with 8 RTX 3090 GPUs each.
+
+- Dataset: Waymo v1.3
+- Model: PointPillars
+- GPU: RTX 3090
+- Batch size: 4 per GPU
+
+![PointPillars training on Waymo per epoch training time with number of GPUs](https://user-images.githubusercontent.com/41028320/220750523-57075575-8cc7-4e40-99b0-a4e79995f1ec.png)
+
+See [`scripts/train_scripts/pointpillars_waymo.sh`](../scripts/train_scripts/pointpillars_waymo.sh) for an example SLURM training script for distributed training on two nodes, using four GPUs on each node. The remaining configuration is read from the config file [`pointpillars_waymo.yml`](../ml3d/configs/pointpillars_waymo.yml).
diff --git a/examples/vis_pred.py b/examples/vis_pred.py
@@ -77,7 +77,7 @@ def pred_custom_data(pc_names, pcs, pipeline_r, pipeline_k):
 
 def get_torch_ckpts():
     kpconv_url = "https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_semantickitti_202009090354utc.pth"
-    randlanet_url = "https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202009090354utc.pth"
+    randlanet_url = "https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202201071330utc.pth"
 
     ckpt_path_r = example_dir + "/vis_weights_{}.pth".format('RandLANet')
     if not exists(ckpt_path_r):
@@ -95,7 +95,7 @@ def get_torch_ckpts():
 
 def get_tf_ckpts():
     kpconv_url = "https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_semantickitti_202010021102utc.zip"
-    randlanet_url = "https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202010091306.zip"
+    randlanet_url = "https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202201071330utc.zip"
 
     ckpt_path_dir = example_dir + "/vis_weights_{}".format('RandLANet')
     if not exists(ckpt_path_dir):

diff --git a/examples/visualize.py b/examples/visualize.py
@@ -164,7 +164,7 @@ def main():
         path = ensure_demo_data()
 
         kpconv_url = "https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_semantickitti_202009090354utc.pth"
-        randlanet_url = "https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202009090354utc.pth"
+        randlanet_url = "https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202201071330utc.pth"
         ckpt_path = "../dataset/checkpoints/vis_weights_{}.pth".format(
             args.model)
 

diff --git a/ml3d/tf/modules/pointnet.py b/ml3d/tf/modules/pointnet.py
@@ -93,7 +93,7 @@ def __init__(self):
     def call(self, xyz, features=None, new_xyz=None, training=True):
         r"""
         :param xyz: (B, N, 3) tensor of the xyz coordinates of the features
-        :param features: (B, N, C) tensor of the descriptors of the the features
+        :param features: (B, N, C) tensor of the descriptors of the features
         :param new_xyz:
         :return:
             new_xyz: (B, npoint, 3) tensor of the new features' xyz

diff --git a/ml3d/tf/utils/pointnet/pointnet2_modules.py b/ml3d/tf/utils/pointnet/pointnet2_modules.py
@@ -17,7 +17,7 @@ def __init__(self):
     def call(self, xyz, features=None, new_xyz=None, training=True):
         r"""
         :param xyz: (B, N, 3) tensor of the xyz coordinates of the features
-        :param features: (B, N, C) tensor of the descriptors of the the features
+        :param features: (B, N, C) tensor of the descriptors of the features
         :param new_xyz:
         :return:
             new_xyz: (B, npoint, 3) tensor of the new features' xyz

diff --git a/ml3d/torch/modules/pointnet.py b/ml3d/torch/modules/pointnet.py
@@ -122,7 +122,7 @@ def forward(self,
                 new_xyz=None) -> (torch.Tensor, torch.Tensor):
         r"""
         :param xyz: (B, N, 3) tensor of the xyz coordinates of the features
-        :param features: (B, N, C) tensor of the descriptors of the the features
+        :param features: (B, N, C) tensor of the descriptors of the features
         :param new_xyz:
         :return:
             new_xyz: (B, npoint, 3) tensor of the new features' xyz

diff --git a/ml3d/torch/utils/pointnet/pointnet2_modules.py b/ml3d/torch/utils/pointnet/pointnet2_modules.py
@@ -50,7 +50,7 @@ def forward(self,
         r"""Forward.
 
         :param xyz: (B, N, 3) tensor of the xyz coordinates of the features
-        :param features: (B, N, C) tensor of the descriptors of the the features
+        :param features: (B, N, C) tensor of the descriptors of the features
         :param new_xyz:
         :return:
             new_xyz: (B, npoint, 3) tensor of the new features' xyz

diff --git a/ml3d/utils/builder.py b/ml3d/utils/builder.py
@@ -14,19 +14,19 @@ def build_network(cfg):
     return build(cfg, NETWORK)
 
 
-def convert_device_name(framework, device_ids):
+def convert_device_name(device_type, device_ids):
     """Convert device to either cpu or cuda."""
     gpu_names = ["gpu", "cuda"]
     cpu_names = ["cpu"]
-    if framework not in cpu_names + gpu_names:
+    if device_type not in cpu_names + gpu_names:
         raise KeyError("the device should either "
-                       "be cuda or cpu but got {}".format(framework))
+                       "be cuda or cpu but got {}".format(device_type))
     assert type(device_ids) is list
     device_ids_new = []
     for device in device_ids:
         device_ids_new.append(int(device))
 
-    if framework in gpu_names:
+    if device_type in gpu_names:
         return "cuda", device_ids_new
     else:
         return "cpu", device_ids_new

diff --git a/model_zoo.md b/model_zoo.md
@@ -5,13 +5,16 @@
 For the task of semantic segmentation, we measure the performance of different methods using the mean intersection-over-union (mIoU) over all classes.
 The table shows the available models and datasets for the segmentation task and the respective scores. Each score links to the respective weight file.
 
-
-| Model / Dataset    | SemanticKITTI | Toronto 3D | S3DIS | Semantic3D | Paris-Lille3D |
-|--------------------|---------------|----------- |-------|--------------|-------------|
-| RandLA-Net (tf)    | [53.7](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202010091306.zip) |  [69.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_toronto3d_202010091250.zip) |  [67.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_s3dis_202010091238.zip)    | [76.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantic3d_202012120312utc.zip) |  [70.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_parislille3d_202012160654utc.zip) |
-| RandLA-Net (torch) | [52.8](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202009090354utc.pth)        |     [71.2](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_toronto3D_202010091306.pth)  |  [67.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_s3dis_202010091238.pth)  | [76.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantic3d_202012120312utc.pth) |  [70.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_parislille3d_202012160654utc.pth) |
-| KPConv     (tf)    | [58.7](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_semantickitti_202010021102utc.zip)         |     [65.6](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_toronto3d_202012221551utc.zip)  |  [65.0](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_s3dis_202010091238.zip) | - |  [76.7](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_parislille3d_202011241550utc.zip) |
-| KPConv     (torch) | [58.0](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_semantickitti_202009090354utc.pth)          |     [65.6](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_toronto3d_202012221551utc.pth) |  [60.0](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_s3dis_202010091238.pth)  | - | [76.7](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_parislille3d_202011241550utc.pth) |
+| Model / Dataset    | SemanticKITTI | Toronto 3D | S3DIS | Semantic3D | Paris-Lille3D | ScanNet |
+|--------------------|---------------|----------- |-------|--------------|-------------|---------|
+| RandLA-Net (tf)    | [53.7](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202201071330utc.zip) |  [73.7](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_toronto3d_202201071330utc.zip) |  [70.9](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_s3dis_202201071330utc.zip)    | [76.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantic3d_202201071330utc.zip) |  [70.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_parislille3d_202201071330utc.zip)* | - |
+| RandLA-Net (torch) | [52.8](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantickitti_202201071330utc.pth)        |     [74.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_toronto3d_202201071330utc.pth)  |  [70.9](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_s3dis_202201071330utc.pth)  | [76.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_semantic3d_202201071330utc.pth) |  [70.0](https://storage.googleapis.com/open3d-releases/model-zoo/randlanet_parislille3d_202201071330utc.pth)* | - |
+| KPConv     (tf)    | [58.7](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_semantickitti_202010021102utc.zip)         |     [65.6](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_toronto3d_202012221551utc.zip)  |  [65.0](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_s3dis_202010091238.zip) | - |  [76.7](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_parislille3d_202011241550utc.zip) | - |
+| KPConv     (torch) | [58.0](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_semantickitti_202009090354utc.pth)          |     [65.6](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_toronto3d_202012221551utc.pth) |  [60.0](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_s3dis_202010091238.pth)  | - | [76.7](https://storage.googleapis.com/open3d-releases/model-zoo/kpconv_parislille3d_202011241550utc.pth) | - |
+| SparseConvUnet (torch)| - | - | - | - | - | [68](https://storage.googleapis.com/open3d-releases/model-zoo/sparseconvunet_scannet_202105031316utc.pth) |
+| SparseConvUnet (tf)| - | - | - | - | - | [68.2](https://storage.googleapis.com/open3d-releases/model-zoo/sparseconvunet_scannet_202105031316utc.zip) |
+| PointTransformer (torch)| - | - | [69.2](https://storage.googleapis.com/open3d-releases/model-zoo/pointtransformer_s3dis_202109241350utc.pth) | - | - | - |
+| PointTransformer (tf)| - | - | [69.2](https://storage.googleapis.com/open3d-releases/model-zoo/pointtransformer_s3dis_202109241350utc.zip) | - | - | - |
 
 [md5 checksum file](https://storage.googleapis.com/open3d-releases/model-zoo/integrity.txt)
 

diff --git a/requirements-tensorflow.txt b/requirements-tensorflow.txt
@@ -1 +1 @@
-tensorflow~=2.8.2
+tensorflow~=2.8.4
diff --git a/requirements-torch-cuda.txt b/requirements-torch-cuda.txt
@@ -1,5 +1,5 @@
 -f https://download.pytorch.org/whl/torch/
-torch==1.12.0+cu116
+torch==1.13.1+cu116
 -f https://download.pytorch.org/whl/torchvision/
-torchvision==0.13.0+cu116
-tensorboard~=2.8.0
+torchvision==0.14.1+cu116
+tensorboard
diff --git a/requirements-torch.txt b/requirements-torch.txt
@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cpu/
-torch==1.12.0+cpu ; sys_platform != 'darwin'
-torchvision==0.13.0+cpu ; sys_platform != 'darwin'
-torch==1.12.0 ; sys_platform == 'darwin'
-torchvision==0.13.0 ; sys_platform == 'darwin'
+torch==1.13.1+cpu ; sys_platform != 'darwin'
+torchvision==0.14.1+cpu ; sys_platform != 'darwin'
+torch==1.13.1 ; sys_platform == 'darwin'
+torchvision==0.14.1 ; sys_platform == 'darwin'
 tensorboard
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 addict
-pillow>=8.2.0
+pillow>=9.3.0
 matplotlib>=3
 numpy>1.15
 pandas>=1.0