-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Polygon tracking feature with XMem tracker #7829
Changes from all commits
3eaa96f
c61403b
e6bf576
62f4b4f
da065eb
afc3f12
c4e2fb6
8c1a668
312e709
bb24cfa
df826f4
46000b1
791ce59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,6 +146,7 @@ interface State { | |
activeLabelID: number | null; | ||
activeTracker: MLModel | null; | ||
convertMasksToPolygons: boolean; | ||
selectedObjectType: ObjectType; | ||
trackedShapes: TrackedShape[]; | ||
fetching: boolean; | ||
pointsReceived: boolean; | ||
|
@@ -235,6 +236,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> { | |
super(props); | ||
this.state = { | ||
convertMasksToPolygons: false, | ||
selectedObjectType: ObjectType.SHAPE, | ||
activeInteractor: props.interactors.length ? props.interactors[0] : null, | ||
activeTracker: props.trackers.length ? props.trackers[0] : null, | ||
activeLabelID: props.labels.length ? props.labels[0].id as number : null, | ||
|
@@ -593,7 +595,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> { | |
const portals = !activeTracker ? | ||
[] : | ||
states | ||
.filter((objectState) => objectState.objectType === 'track' && objectState.shapeType === 'rectangle') | ||
.filter((objectState) => objectState.objectType === 'track' && (objectState.shapeType === 'rectangle' || objectState.shapeType === 'polygon')) | ||
.map((objectState: any): React.ReactPortal | null => { | ||
const { clientID } = objectState; | ||
const selectorID = `#cvat-objects-sidebar-state-item-${clientID}`; | ||
|
@@ -822,7 +824,10 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> { | |
job: jobInstance.id, | ||
}) as TrackerResults; | ||
|
||
response.shapes = response.shapes.map(trackedRectangleMapper); | ||
// If shape type is rectangle, keep same approach | ||
if (response.shapes[0].length === 4) { | ||
response.shapes = response.shapes.map(trackedRectangleMapper); | ||
} | ||
for (let i = 0; i < trackableObjects.clientIDs.length; i++) { | ||
const clientID = trackableObjects.clientIDs[i]; | ||
const shape = response.shapes[i]; | ||
|
@@ -859,15 +864,15 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> { | |
} | ||
|
||
private async constructFromPoints(): Promise<void> { | ||
const { convertMasksToPolygons } = this.state; | ||
const { convertMasksToPolygons, selectedObjectType } = this.state; | ||
const { | ||
frame, labels, curZOrder, activeLabelID, createAnnotations, | ||
} = this.props; | ||
|
||
if (convertMasksToPolygons) { | ||
const object = new core.classes.ObjectState({ | ||
frame, | ||
objectType: ObjectType.SHAPE, | ||
objectType: selectedObjectType, | ||
source: core.enums.Source.SEMI_AUTO, | ||
label: labels.find((label) => label.id === activeLabelID as number) as Label, | ||
shapeType: ShapeType.POLYGON, | ||
|
@@ -958,6 +963,29 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> { | |
); | ||
} | ||
|
||
private renderObjectTypeBlock(): JSX.Element { | ||
const { selectedObjectType } = this.state; | ||
const objectTypes = Object.values(ObjectType); | ||
objectTypes.splice(objectTypes.indexOf(ObjectType.TAG), 1); | ||
return ( | ||
<Row className='cvat-interactors-setups-container'> | ||
<Select | ||
value={selectedObjectType} | ||
onChange={(value: ObjectType) => { | ||
this.setState({ selectedObjectType: value }); | ||
}} | ||
> | ||
{objectTypes.map((type) => ( | ||
<Select.Option key={type} value={type}> | ||
{type} | ||
</Select.Option> | ||
))} | ||
</Select> | ||
<Text>Object Type</Text> | ||
</Row> | ||
); | ||
} | ||
|
||
private renderLabelBlock(): JSX.Element { | ||
const { labels } = this.props; | ||
const { activeLabelID } = this.state; | ||
|
@@ -1346,6 +1374,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> { | |
} | ||
|
||
private renderPopoverContent(): JSX.Element { | ||
const { convertMasksToPolygons } = this.state; | ||
return ( | ||
<div className='cvat-tools-control-popover-content'> | ||
<Row justify='start'> | ||
|
@@ -1358,6 +1387,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> { | |
<Tabs type='card' tabBarGutter={8}> | ||
<Tabs.TabPane key='interactors' tab='Interactors'> | ||
{this.renderMasksConvertingBlock()} | ||
{convertMasksToPolygons ? this.renderObjectTypeBlock() : null} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
{this.renderLabelBlock()} | ||
{this.renderInteractorBlock()} | ||
</Tabs.TabPane> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
metadata: | ||
name: pth-omerferhatt-xmem | ||
namespace: cvat | ||
annotations: | ||
name: XMem | ||
type: tracker | ||
spec: | ||
framework: pytorch | ||
|
||
spec: | ||
description: Long-Term Object Segmentation with an Atkinson-Shiffrin Memory Model | ||
runtime: 'python:3.9' | ||
handler: main:handler | ||
eventTimeout: 30s | ||
|
||
env: | ||
- name: PYTHONPATH | ||
value: /opt/nuclio/xmem | ||
|
||
build: | ||
image: cvat.pth.omerferhatt.xmem:latest-gpu | ||
baseImage: nvidia/cuda:12.1.0-runtime-ubuntu22.04 | ||
|
||
directives: | ||
preCopy: | ||
- kind: RUN | ||
value: |- | ||
apt update \ | ||
&& apt install -y --no-install-recommends \ | ||
wget \ | ||
git \ | ||
ca-certificates \ | ||
python-is-python3 \ | ||
python3 \ | ||
python3-pip \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
- kind: WORKDIR | ||
value: /opt/nuclio | ||
- kind: RUN | ||
value: git clone --branch main https://github.com/omerferhatt/XMem xmem | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not to use original repository? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also it is better to use specific tag. |
||
- kind: RUN | ||
value: pip install opencv-python-headless jsonpickle | ||
- kind: RUN | ||
value: |- | ||
pip install torch torchvision | ||
Comment on lines
+42
to
+45
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Usually it is a good idea to fix dependencies version. Otherwise sometimes it will turn around, that the image cannot be build anymore. |
||
- kind: RUN | ||
value: wget 'https://www.dropbox.com/scl/fi/5m1l747p15qzgq023e0q9/xmem.pth?rlkey=ss2kjaq4qlvvk5juucyvtmrh8&dl=0' -O '/xmem.pth' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not think we should put the file to root There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not think dropbox is reliable enough. Also it is not clear who is the weights owner, not clear weights license. |
||
|
||
triggers: | ||
myHttpTrigger: | ||
maxWorkers: 1 | ||
kind: 'http' | ||
workerAvailabilityTimeoutMilliseconds: 10000 | ||
attributes: | ||
# Set value from the calculation of tracking of 100 objects at the same time on a 4k image | ||
maxRequestBodySize: 1073741824 # 1GB | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason to send 1 Gb body, within HTTP request? |
||
|
||
resources: | ||
limits: | ||
nvidia.com/gpu: 1 | ||
|
||
platform: | ||
attributes: | ||
restartPolicy: | ||
name: always | ||
maximumRetryCount: 3 | ||
mountMode: volume |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
metadata: | ||
name: pth-omerferhatt-xmem | ||
namespace: cvat | ||
annotations: | ||
name: XMem | ||
type: tracker | ||
spec: | ||
framework: pytorch | ||
|
||
spec: | ||
description: Long-Term Object Segmentation with an Atkinson-Shiffrin Memory Model | ||
runtime: 'python:3.9' | ||
handler: main:handler | ||
eventTimeout: 30s | ||
|
||
env: | ||
- name: PYTHONPATH | ||
value: /opt/nuclio/xmem | ||
|
||
build: | ||
image: cvat.pth.omerferhatt.xmem | ||
baseImage: ubuntu:22.04 | ||
|
||
directives: | ||
preCopy: | ||
- kind: RUN | ||
value: |- | ||
apt update \ | ||
&& apt install -y --no-install-recommends \ | ||
wget \ | ||
git \ | ||
ca-certificates \ | ||
python-is-python3 \ | ||
python3 \ | ||
python3-pip \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
- kind: WORKDIR | ||
value: /opt/nuclio | ||
- kind: RUN | ||
value: git clone --branch main https://github.com/omerferhatt/XMem xmem | ||
- kind: RUN | ||
value: pip install opencv-python-headless jsonpickle | ||
- kind: RUN | ||
value: |- | ||
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu | ||
- kind: RUN | ||
value: wget 'https://www.dropbox.com/scl/fi/5m1l747p15qzgq023e0q9/xmem.pth?rlkey=ss2kjaq4qlvvk5juucyvtmrh8&dl=0' -O '/xmem.pth' | ||
|
||
triggers: | ||
myHttpTrigger: | ||
maxWorkers: 1 | ||
kind: 'http' | ||
workerAvailabilityTimeoutMilliseconds: 10000 | ||
attributes: | ||
# Set value from the calculation of tracking of 100 objects at the same time on a 4k image | ||
maxRequestBodySize: 1073741824 # 1GB | ||
|
||
platform: | ||
attributes: | ||
restartPolicy: | ||
name: always | ||
maximumRetryCount: 3 | ||
mountMode: volume |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import base64 | ||
import io | ||
import json | ||
|
||
import numpy as np | ||
from model_handler import ModelHandler | ||
from PIL import Image | ||
|
||
|
||
def init_context(context): | ||
context.logger.info("Init context... 0%") | ||
model = ModelHandler() | ||
context.user_data.model = model | ||
context.logger.info("Init context...100%") | ||
|
||
|
||
def handler(context, event): | ||
context.logger.info("Run XMem model") | ||
data = event.body | ||
buf = io.BytesIO(base64.b64decode(data["image"])) | ||
shapes = data.get("shapes") | ||
states = data.get("states") | ||
|
||
image = Image.open(buf).convert("RGB") | ||
image = np.array(image)[:, :, ::-1].copy() | ||
results = {"shapes": [], "states": []} | ||
for i, shape in enumerate(shapes): | ||
context.logger.info(f"Inference [{i}] started") | ||
|
||
shape, state = context.user_data.model.infer( | ||
image, shape, states[i] if i < len(states) else None | ||
) | ||
results["shapes"].append(shape) | ||
results["states"].append(state) | ||
context.logger.info(f"Inference [{i}] finised") | ||
|
||
return context.Response( | ||
body=json.dumps(results), | ||
headers={}, | ||
content_type="application/json", | ||
status_code=200, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you mean that this code supports only
shape
andtrack
, you should write exactly this.Otherwise, when we will add one more object type, this part will be automatically broken and nobody will know about it