Source code for standard_e2e.caching.adapters.pano_adapter

from typing import Any

import albumentations as A
import cv2
import numpy as np

from standard_e2e.caching.adapters.abstract_adapter import AbstractAdapter
from standard_e2e.data_structures.frame_data import StandardFrameData
from standard_e2e.enums import CameraDirection, Modality, StandardFrameDataField
from standard_e2e.utils.image_utils import CropTop



[docs]
class PanoImageAdapter(AbstractAdapter):
    """Image adapter for Waymo E2E dataset."""

    DEFAULT_CAMERAS_ORDER = [
        CameraDirection.FRONT_LEFT,
        CameraDirection.FRONT,
        CameraDirection.FRONT_RIGHT,
    ]

    def __init__(
        self,
        top_cut_frac: float = 0.0,
        max_size: int = 640,
        cameras_order: list[CameraDirection] | None = None,
    ):
        super().__init__()
        self._image_transform = A.Compose(
            [
                CropTop(top_cut_frac=top_cut_frac),
                A.LongestMaxSize(max_size=max_size, p=1.0),
            ]
        )
        self._cameras_order = cameras_order or self.DEFAULT_CAMERAS_ORDER

    @property
    def name(self) -> str:
        return "pano_image_adapter"

    @property
    def consumes_attrs(self) -> set[StandardFrameDataField]:
        return {StandardFrameDataField.CAMERAS}

    def _transform(self, standard_frame_data: StandardFrameData) -> dict[Modality, Any]:
        """Transform cameras data to a single panoramic image."""
        # Datasets without a camera rig (e.g. AV2 lidar) ship an empty
        # ``cameras`` dict; skip silently so the same multi-dataset config can
        # drive both camera-bearing and camera-less sources.
        if not standard_frame_data.cameras:
            return {}
        image_list = [
            standard_frame_data.cameras[camera_direction].image
            for camera_direction in self._cameras_order
        ]
        # AV2 mixes a portrait front-center camera with landscape side cameras;
        # equalise heights before horizontal concat. No-op when all cameras
        # already share a height (e.g. Waymo).
        heights = [img.shape[0] for img in image_list]
        if min(heights) != max(heights):
            common_height = min(heights)
            image_list = [
                cv2.resize(
                    img,
                    (
                        int(round(img.shape[1] * common_height / img.shape[0])),
                        common_height,
                    ),
                    interpolation=cv2.INTER_AREA,
                ).astype(np.uint8)
                for img in image_list
            ]
        concatenated_image = np.concatenate(image_list, axis=1)
        adapted_image = self._image_transform(image=concatenated_image)["image"]
        return {Modality.CAMERAS: adapted_image}