cvat-ai · omerferhatt · Jan 18, 2024 · Jan 18, 2024 · Apr 30, 2024 · Apr 30, 2024
@@ -146,6 +146,7 @@ interface State {
     activeLabelID: number | null;
     activeTracker: MLModel | null;
     convertMasksToPolygons: boolean;
+    selectedObjectType: ObjectType;
     trackedShapes: TrackedShape[];
     fetching: boolean;
     pointsReceived: boolean;
@@ -235,6 +236,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
         super(props);
         this.state = {
             convertMasksToPolygons: false,
+            selectedObjectType: ObjectType.SHAPE,
             activeInteractor: props.interactors.length ? props.interactors[0] : null,
             activeTracker: props.trackers.length ? props.trackers[0] : null,
             activeLabelID: props.labels.length ? props.labels[0].id as number : null,
@@ -593,7 +595,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
         const portals = !activeTracker ?
             [] :
             states
-                .filter((objectState) => objectState.objectType === 'track' && objectState.shapeType === 'rectangle')
+                .filter((objectState) => objectState.objectType === 'track' && (objectState.shapeType === 'rectangle' || objectState.shapeType === 'polygon'))
                 .map((objectState: any): React.ReactPortal | null => {
                     const { clientID } = objectState;
                     const selectorID = `#cvat-objects-sidebar-state-item-${clientID}`;
@@ -822,7 +824,10 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
                             job: jobInstance.id,
                         }) as TrackerResults;
 
-                        response.shapes = response.shapes.map(trackedRectangleMapper);
+                        // If shape type is rectangle, keep same approach
+                        if (response.shapes[0].length === 4) {
+                            response.shapes = response.shapes.map(trackedRectangleMapper);
+                        }
                         for (let i = 0; i < trackableObjects.clientIDs.length; i++) {
                             const clientID = trackableObjects.clientIDs[i];
                             const shape = response.shapes[i];
@@ -859,15 +864,15 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
     }
 
     private async constructFromPoints(): Promise<void> {
-        const { convertMasksToPolygons } = this.state;
+        const { convertMasksToPolygons, selectedObjectType } = this.state;
         const {
             frame, labels, curZOrder, activeLabelID, createAnnotations,
         } = this.props;
 
         if (convertMasksToPolygons) {
             const object = new core.classes.ObjectState({
                 frame,
-                objectType: ObjectType.SHAPE,
+                objectType: selectedObjectType,
                 source: core.enums.Source.SEMI_AUTO,
                 label: labels.find((label) => label.id === activeLabelID as number) as Label,
                 shapeType: ShapeType.POLYGON,
@@ -958,6 +963,29 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
         );
     }
 
+    private renderObjectTypeBlock(): JSX.Element {
+        const { selectedObjectType } = this.state;
+        const objectTypes = Object.values(ObjectType);
+        objectTypes.splice(objectTypes.indexOf(ObjectType.TAG), 1);
+        return (
+            <Row className='cvat-interactors-setups-container'>
+                <Select
+                    value={selectedObjectType}
+                    onChange={(value: ObjectType) => {
+                        this.setState({ selectedObjectType: value });
+                    }}
+                >
+                    {objectTypes.map((type) => (
+                        <Select.Option key={type} value={type}>
+                            {type}
+                        </Select.Option>
+                    ))}
+                </Select>
+                <Text>Object Type</Text>
+            </Row>
+        );
+    }
+
     private renderLabelBlock(): JSX.Element {
         const { labels } = this.props;
         const { activeLabelID } = this.state;
@@ -1346,6 +1374,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
     }
 
     private renderPopoverContent(): JSX.Element {
+        const { convertMasksToPolygons } = this.state;
         return (
             <div className='cvat-tools-control-popover-content'>
                 <Row justify='start'>
@@ -1358,6 +1387,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
                 <Tabs type='card' tabBarGutter={8}>
                     <Tabs.TabPane key='interactors' tab='Interactors'>
                         {this.renderMasksConvertingBlock()}
+                        {convertMasksToPolygons ? this.renderObjectTypeBlock() : null}
                         {this.renderLabelBlock()}
                         {this.renderInteractorBlock()}
                     </Tabs.TabPane>

diff --git a/serverless/pytorch/omerferhatt/xmem/nuclio/function-gpu.yaml b/serverless/pytorch/omerferhatt/xmem/nuclio/function-gpu.yaml
@@ -0,0 +1,67 @@
+metadata:
+  name: pth-omerferhatt-xmem
+  namespace: cvat
+  annotations:
+    name: XMem
+    type: tracker
+    spec:
+    framework: pytorch
+
+spec:
+  description: Long-Term Object Segmentation with an Atkinson-Shiffrin Memory Model
+  runtime: 'python:3.9'
+  handler: main:handler
+  eventTimeout: 30s
+
+  env:
+    - name: PYTHONPATH
+      value: /opt/nuclio/xmem
+
+  build:
+    image: cvat.pth.omerferhatt.xmem:latest-gpu
+    baseImage: nvidia/cuda:12.1.0-runtime-ubuntu22.04
+
+    directives:
+      preCopy:
+        - kind: RUN
+          value: |-
+            apt update \
+              && apt install -y --no-install-recommends \
+                wget \
+                git \
+                ca-certificates \
+                python-is-python3 \
+                python3 \
+                python3-pip \
+              && rm -rf /var/lib/apt/lists/*
+        - kind: WORKDIR
+          value: /opt/nuclio
+        - kind: RUN
+          value: git clone --branch main https://github.com/omerferhatt/XMem xmem
+        - kind: RUN
+          value: pip install opencv-python-headless jsonpickle
+        - kind: RUN
+          value: |-
+            pip install torch torchvision
+        - kind: RUN
+          value: wget 'https://www.dropbox.com/scl/fi/5m1l747p15qzgq023e0q9/xmem.pth?rlkey=ss2kjaq4qlvvk5juucyvtmrh8&dl=0' -O '/xmem.pth'
+
+  triggers:
+    myHttpTrigger:
+      maxWorkers: 1
+      kind: 'http'
+      workerAvailabilityTimeoutMilliseconds: 10000
+      attributes:
+        # Set value from the calculation of tracking of 100 objects at the same time on a 4k image
+        maxRequestBodySize: 1073741824 # 1GB
+
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+
+  platform:
+    attributes:
+      restartPolicy:
+        name: always
+        maximumRetryCount: 3
+      mountMode: volume
diff --git a/serverless/pytorch/omerferhatt/xmem/nuclio/function.yaml b/serverless/pytorch/omerferhatt/xmem/nuclio/function.yaml
@@ -0,0 +1,63 @@
+metadata:
+  name: pth-omerferhatt-xmem
+  namespace: cvat
+  annotations:
+    name: XMem
+    type: tracker
+    spec:
+    framework: pytorch
+
+spec:
+  description: Long-Term Object Segmentation with an Atkinson-Shiffrin Memory Model
+  runtime: 'python:3.9'
+  handler: main:handler
+  eventTimeout: 30s
+
+  env:
+    - name: PYTHONPATH
+      value: /opt/nuclio/xmem
+
+  build:
+    image: cvat.pth.omerferhatt.xmem
+    baseImage: ubuntu:22.04
+
+    directives:
+      preCopy:
+        - kind: RUN
+          value: |-
+            apt update \
+              && apt install -y --no-install-recommends \
+                wget \
+                git \
+                ca-certificates \
+                python-is-python3 \
+                python3 \
+                python3-pip \
+              && rm -rf /var/lib/apt/lists/*
+        - kind: WORKDIR
+          value: /opt/nuclio
+        - kind: RUN
+          value: git clone --branch main https://github.com/omerferhatt/XMem xmem
+        - kind: RUN
+          value: pip install opencv-python-headless jsonpickle
+        - kind: RUN
+          value: |-
+            pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
+        - kind: RUN
+          value: wget 'https://www.dropbox.com/scl/fi/5m1l747p15qzgq023e0q9/xmem.pth?rlkey=ss2kjaq4qlvvk5juucyvtmrh8&dl=0' -O '/xmem.pth'
+
+  triggers:
+    myHttpTrigger:
+      maxWorkers: 1
+      kind: 'http'
+      workerAvailabilityTimeoutMilliseconds: 10000
+      attributes:
+        # Set value from the calculation of tracking of 100 objects at the same time on a 4k image
+        maxRequestBodySize: 1073741824 # 1GB
+
+  platform:
+    attributes:
+      restartPolicy:
+        name: always
+        maximumRetryCount: 3
+      mountMode: volume
diff --git a/serverless/pytorch/omerferhatt/xmem/nuclio/main.py b/serverless/pytorch/omerferhatt/xmem/nuclio/main.py
@@ -0,0 +1,42 @@
+import base64
+import io
+import json
+
+import numpy as np
+from model_handler import ModelHandler
+from PIL import Image
+
+
+def init_context(context):
+    context.logger.info("Init context...  0%")
+    model = ModelHandler()
+    context.user_data.model = model
+    context.logger.info("Init context...100%")
+
+
+def handler(context, event):
+    context.logger.info("Run XMem model")
+    data = event.body
+    buf = io.BytesIO(base64.b64decode(data["image"]))
+    shapes = data.get("shapes")
+    states = data.get("states")
+
+    image = Image.open(buf).convert("RGB")
+    image = np.array(image)[:, :, ::-1].copy()
+    results = {"shapes": [], "states": []}
+    for i, shape in enumerate(shapes):
+        context.logger.info(f"Inference [{i}] started")
+
+        shape, state = context.user_data.model.infer(
+            image, shape, states[i] if i < len(states) else None
+        )
+        results["shapes"].append(shape)
+        results["states"].append(state)
+        context.logger.info(f"Inference [{i}] finised")
+
+    return context.Response(
+        body=json.dumps(results),
+        headers={},
+        content_type="application/json",
+        status_code=200,
+    )