Source code for standard_e2e.caching.adapters.pano_adapter
from typing import Any
import albumentations as A
import cv2
import numpy as np
from standard_e2e.caching.adapters.abstract_adapter import AbstractAdapter
from standard_e2e.data_structures.frame_data import StandardFrameData
from standard_e2e.enums import CameraDirection, Modality, StandardFrameDataField
from standard_e2e.utils.image_utils import CropTop
[docs]
class PanoImageAdapter(AbstractAdapter):
"""Image adapter for Waymo E2E dataset."""
DEFAULT_CAMERAS_ORDER = [
CameraDirection.FRONT_LEFT,
CameraDirection.FRONT,
CameraDirection.FRONT_RIGHT,
]
def __init__(
self,
top_cut_frac: float = 0.0,
max_size: int = 640,
cameras_order: list[CameraDirection] | None = None,
):
super().__init__()
self._image_transform = A.Compose(
[
CropTop(top_cut_frac=top_cut_frac),
A.LongestMaxSize(max_size=max_size, p=1.0),
]
)
self._cameras_order = cameras_order or self.DEFAULT_CAMERAS_ORDER
@property
def name(self) -> str:
return "pano_image_adapter"
@property
def consumes_attrs(self) -> set[StandardFrameDataField]:
return {StandardFrameDataField.CAMERAS}
def _transform(self, standard_frame_data: StandardFrameData) -> dict[Modality, Any]:
"""Transform cameras data to a single panoramic image."""
# Datasets without a camera rig (e.g. AV2 lidar) ship an empty
# ``cameras`` dict; skip silently so the same multi-dataset config can
# drive both camera-bearing and camera-less sources.
if not standard_frame_data.cameras:
return {}
image_list = [
standard_frame_data.cameras[camera_direction].image
for camera_direction in self._cameras_order
]
# AV2 mixes a portrait front-center camera with landscape side cameras;
# equalise heights before horizontal concat. No-op when all cameras
# already share a height (e.g. Waymo).
heights = [img.shape[0] for img in image_list]
if min(heights) != max(heights):
common_height = min(heights)
image_list = [
cv2.resize(
img,
(
int(round(img.shape[1] * common_height / img.shape[0])),
common_height,
),
interpolation=cv2.INTER_AREA,
).astype(np.uint8)
for img in image_list
]
concatenated_image = np.concatenate(image_list, axis=1)
adapted_image = self._image_transform(image=concatenated_image)["image"]
return {Modality.CAMERAS: adapted_image}