From e7e0b551c297c70ce21ab2a4dc5281909b8829de Mon Sep 17 00:00:00 2001 From: Shivansh Shalabh <65492037+ShivanshShalabh@users.noreply.github.com> Date: Fri, 28 Jun 2024 22:30:13 -0400 Subject: [PATCH] Completed imageToText (#21) --- .../QuickInput/QuickInput.stories.js | 14 ++++- .../Outputs/ImageToText/ImageToText.scss | 7 +++ .../Outputs/ImageToText/ImageToTextOutput.js | 29 ++++++++++ .../ImageToText/ImageToTextOutput.stories.js | 13 +++++ .../ImageToText/ImageToTextOutput.test.js | 0 .../ImageToTextOutputInputSection.js | 29 ++++++++++ .../testData/testImageToTextOutput.js | 29 ++++++++++ .../QuickOutput/Outputs/Text/TextOutput.scss | 4 +- src/helpers/DefaultModels.js | 53 ++++++++++++++++++- src/helpers/Task.js | 36 +++++++++++-- src/helpers/TaskIDs.js | 1 + src/helpers/UppyFileTypeCheckerPlugin.js | 4 +- src/helpers/sampleImages.js | 15 ++++++ src/resources/icons/icon-imageToText.svg | 1 + 14 files changed, 225 insertions(+), 10 deletions(-) create mode 100644 src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToText.scss create mode 100644 src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.js create mode 100644 src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.stories.js create mode 100644 src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.test.js create mode 100644 src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutputInputSection.js create mode 100644 src/components/Experiment/QuickOutput/Outputs/ImageToText/testData/testImageToTextOutput.js create mode 100644 src/resources/icons/icon-imageToText.svg diff --git a/src/components/Experiment/QuickInput/QuickInput.stories.js b/src/components/Experiment/QuickInput/QuickInput.stories.js index 7b3875c8..d4df3478 100644 --- a/src/components/Experiment/QuickInput/QuickInput.stories.js +++ b/src/components/Experiment/QuickInput/QuickInput.stories.js @@ -17,6 +17,7 @@ import { textToImage, textToVideo, imageTo3D, + imageToText, } from "../../../helpers/TaskIDs"; import { SampleImageClassificationInputs, @@ -29,7 +30,8 @@ import { SampleTextGuidedImageToImageInputs, SampleDocumentQuestionAnsweringInputs, SampleTextToImage, - SampleTextToVideo + SampleTextToVideo, + SampleImageToText } from "../../../helpers/sampleImages"; export default { @@ -226,3 +228,13 @@ TextToVideo.args = { }, }, }; + +export const ImageToText = Template.bind({}); +ImageToText.args = { + sampleInputs: SampleImageToText, + model: { + output: { + type: imageToText, + }, + }, +}; diff --git a/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToText.scss b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToText.scss new file mode 100644 index 00000000..63dc6721 --- /dev/null +++ b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToText.scss @@ -0,0 +1,7 @@ +.image-to-text-output { + &__input-image-content { + img { + margin-top: 12px; + } + } +} \ No newline at end of file diff --git a/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.js b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.js new file mode 100644 index 00000000..437e4bc7 --- /dev/null +++ b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.js @@ -0,0 +1,29 @@ +import React from "react"; +import useBEMNaming from "../../../../../common/useBEMNaming"; +import useTextOutput from "../Text/useTextOutput"; +import ImageToTextOutputInputSection from "./ImageToTextOutputInputSection"; +import { TextOutputBox } from "../Text/TextOutputBox"; +import { imageToText } from "../../../../../helpers/TaskIDs"; + +export default function ImageToTextOutput(props) { + const { getBlock } = useBEMNaming("image-to-text-output"); + const { output, inferenceDuration, input, setInput } = useTextOutput( + props.trial + ); + + const onSubmit = () => { + props.onSubmit(input); + }; + + return ( +
+ + + +
+ ); +} diff --git a/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.stories.js b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.stories.js new file mode 100644 index 00000000..bd2c38dc --- /dev/null +++ b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.stories.js @@ -0,0 +1,13 @@ +import React from "react"; +import ImageToTextOutput from "./ImageToTextOutput"; +import { TestImageToTextOutput } from "./testData/testImageToTextOutput"; + +export default { + title: "Experiments/Quick Output/Image To Text", + component: ImageToTextOutput, +}; + +const template = (args) => ; + +export const Default = template.bind({}); +Default.args = { trial: TestImageToTextOutput }; diff --git a/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.test.js b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutput.test.js new file mode 100644 index 00000000..e69de29b diff --git a/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutputInputSection.js b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutputInputSection.js new file mode 100644 index 00000000..3f351f8c --- /dev/null +++ b/src/components/Experiment/QuickOutput/Outputs/ImageToText/ImageToTextOutputInputSection.js @@ -0,0 +1,29 @@ +import React from "react"; +import useBEMNaming from "../../../../../common/useBEMNaming"; +import "./ImageToText.scss" +export default function ImageToTextOutputInputSection(props) { + const { getElement } = useBEMNaming("image-to-text-output"); + const input = props.input; + + return ( +
+

+ Input Image +

+ +
+
+ The uploaded image file: +
+ {input.alt} +
+ + +
+ ); +} diff --git a/src/components/Experiment/QuickOutput/Outputs/ImageToText/testData/testImageToTextOutput.js b/src/components/Experiment/QuickOutput/Outputs/ImageToText/testData/testImageToTextOutput.js new file mode 100644 index 00000000..6fcaa410 --- /dev/null +++ b/src/components/Experiment/QuickOutput/Outputs/ImageToText/testData/testImageToTextOutput.js @@ -0,0 +1,29 @@ +export const TestImageToTextOutputGeneratedToken = { + id: "sampletestimagetotextoutputidhere" +}; + +export const TestImageToTextOutput = { + id: "sampletestimagetotextoutputidhere", + inputs: [ + { + src: "https://s3.amazonaws.com/uploads.staging.mlmodelscope.org/crabby.png", + alt: "crab" + }, + ], + completed_at: "2023-06-03T18:17:14.513854Z", + results: { + 'duration': "9.216154124s", + 'duration_for_inference': "9.193807904s", + 'responses': [ + { + 'features': [ + { + 'text': 'A crab on the beach', + 'type': 'TEXT' + } + ], + 'id': "sampletestimagetotextoutputresponseidhere" + } + ] + } +} \ No newline at end of file diff --git a/src/components/Experiment/QuickOutput/Outputs/Text/TextOutput.scss b/src/components/Experiment/QuickOutput/Outputs/Text/TextOutput.scss index 0562b3b6..3050a1fd 100644 --- a/src/components/Experiment/QuickOutput/Outputs/Text/TextOutput.scss +++ b/src/components/Experiment/QuickOutput/Outputs/Text/TextOutput.scss @@ -2,7 +2,7 @@ .text-output, .text-to-code-output, .audio-to-text-output, .text-to-audio-output, -.text-conversation-output { +.text-conversation-output,.image-to-text-output { display: flex; flex-direction: row; gap: 72px; @@ -42,7 +42,7 @@ } - &-audio-content { + &-audio-content, &-image-content { padding-top: 12px; font-size: 20px; } diff --git a/src/helpers/DefaultModels.js b/src/helpers/DefaultModels.js index 20aec52e..25baaf5a 100644 --- a/src/helpers/DefaultModels.js +++ b/src/helpers/DefaultModels.js @@ -1,4 +1,4 @@ -import { audioToText, documentQuestionAnswering, styleTransfer, textGuidedImageToImage, textToAudio, textToImage, textToText, textToVideo, visualQuestionAnswering } from "./TaskIDs"; +import { audioToText, documentQuestionAnswering, styleTransfer, textGuidedImageToImage, textToAudio, textToImage, textToText, textToVideo, visualQuestionAnswering,imageToText } from "./TaskIDs"; export const DefaultImageClassificationModel = { id: 1, @@ -801,4 +801,55 @@ export const DefaultTextToVideo = { link2: "", }, version: "1.0", +}; +export const DefaultImageToText = { + id: 190, + created_at: "2022-04-29T20:48:47.370171Z", + updated_at: "2022-04-29T20:48:47.370171Z", + attributes: { + Top1: "", + Top5: "", + kind: "CNN", + manifest_author: "Jingning Tang", + training_dataset: "PASCAL VOC 2012", + }, + description: + "TensorFlow Chatbot model, which is trained on the COCO (Common Objects in Context) dataset. Use deeplabv3_mnv2_dm05_pascal_train_aug(deeplabv3_mnv2_dm05_pascal_train_aug_2018_10_01) from TensorFlow DeepLab Model Zoo.\n", + short_description: + "DeepLabv3 is a deep convolutional neural networks for semantic chatbotness. It employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates.", + model: { + graph_checksum: "0336ceb67b378df8ada0efe9eadb5ac8", + graph_path: + "https://s3.amazonaws.com/store.carml.org/models/tensorflow/models/deeplabv3_mnv2_dm05_pascal_train_aug_2018_10_01/frozen_inference_graph.pb", + weights_checksum: "", + weights_path: "", + }, + framework: { + id: 4, + name: "TensorFlow", + version: "1.14.0", + architectures: [ + { + name: "amd64", + }, + ], + }, + input: { + description: "text to be responded to", + type: "text", + }, + license: "Apache License, Version 2.0", + name: "DeepLabv3_MobileNet_v2_DM_05_PASCAL_VOC_Train_Aug", + output: { + description: "the chatbot's response to the inputted text", + type: imageToText, + }, + url: { + github: + "https://github.com/rai-project/tensorflow/blob/master/builtin_models/DeepLabv3_MobileNet_v2_DM_05_PASCAL_VOC_Train_Aug.yml", + citation: "https://arxiv.org/pdf/1802.02611v3.pdf", + link1: "https://arxiv.org/pdf/1706.05587.pdf", + link2: "", + }, + version: "1.0", }; \ No newline at end of file diff --git a/src/helpers/Task.js b/src/helpers/Task.js index a604ec5b..0d343808 100644 --- a/src/helpers/Task.js +++ b/src/helpers/Task.js @@ -15,7 +15,8 @@ import { textGuidedImageToImage, documentQuestionAnswering, textToImage, - textToVideo + textToVideo, + imageToText } from "./TaskIDs"; import React from "react"; import { ReactComponent as ImageClassification } from "../resources/icons/icon-imageClassification.svg"; @@ -34,6 +35,8 @@ import { ReactComponent as VisualQuestionAnswering } from "../resources/icons/ic import { ReactComponent as TextGuidedImageToImage } from "../resources/icons/icon-textGuidedImagetoImage.svg"; import { ReactComponent as TexttoImage } from "../resources/icons/icon-textToImage.svg"; import { ReactComponent as TexttoVideo } from "../resources/icons/icon-textToVideo.svg"; +import { ReactComponent as ImageToText } from "../resources/icons/icon-imageToText.svg"; + import { DefaultImageClassificationModel, @@ -51,7 +54,8 @@ import { DefaultDocumentQuestionAnsweringModel, DefaultTextToImage, DefaultTextToVideo, - DefaultImageTo3DModel + DefaultImageTo3DModel, + DefaultImageToText } from "./DefaultModels"; import { SampleAudioToTextInputs, @@ -64,7 +68,8 @@ import { SampleVisualQuestionAnsweringInputs, SampleDocumentQuestionAnsweringInputs, SampleTextToImage, - SampleTextToVideo + SampleTextToVideo, + SampleImageToText } from "./sampleImages"; import { TestImageClassificationResult } from "../components/Experiment/QuickOutput/Outputs/Classification/Features"; @@ -85,6 +90,7 @@ import { TestTextGuidedImageToImage } from "../components/Experiment/QuickOutput import { TestTextToImageOutput } from "../components/Experiment/QuickOutput/Outputs/TextToImage/testData/testTextToImageOutput"; import { TestTextToVideoOutput } from "../components/Experiment/QuickOutput/Outputs/TextToVideo/testData/testTextToVideoOutput"; import { TestImageTo3DOutput } from "../components/Experiment/QuickOutput/Outputs/ImageTo3D/testData/testImageTo3DOutput"; +import { TestImageToTextOutput } from "../components/Experiment/QuickOutput/Outputs/ImageToText/testData/testImageToTextOutput"; export default class Task { @@ -213,7 +219,7 @@ export default class Task { inputType: TaskInputTypes.Image, }, ], - + outputText: "3D model generated from the uploaded images", icon: (props) => , sampleInputs: [], @@ -417,6 +423,19 @@ export default class Task { inputType: TaskInputTypes.Text, }); + static image_to_text = new Task({ + name: "Image to Text", + description: "Caption an image.", + id: imageToText, + inputText: "Generate a caption for the image.", + outputText: "Caption:", + icon: (props) => , + sampleInputs: SampleImageToText, + tutorialDescription: "Image to Text model generates a caption for an image.", + inputType: TaskInputTypes.Image, + }); + + constructor(options) { this.name = options.name ?? ""; this.id = options.id ?? this.name; @@ -479,6 +498,8 @@ export default class Task { return Task.text_to_image; case textToVideo: return Task.text_to_video; + case imageToText: + return Task.image_to_text; default: return new Task({ name: "unknown", description: "unknown task name" }); } @@ -521,6 +542,8 @@ export default class Task { return DefaultTextToImage; case textToVideo: return DefaultTextToVideo; + case imageToText: + return DefaultImageToText; default: return undefined; } @@ -562,6 +585,8 @@ export default class Task { return TestTextToImageOutput; case textToVideo: return TestTextToVideoOutput; + case imageToText: + return TestImageToTextOutput; default: return undefined; } @@ -586,7 +611,8 @@ export default class Task { this.getStaticTask(textGuidedImageToImage), this.getStaticTask(documentQuestionAnswering), this.getStaticTask(textToImage), - this.getStaticTask(textToVideo) + this.getStaticTask(textToVideo), + this.getStaticTask(imageToText) ]; } diff --git a/src/helpers/TaskIDs.js b/src/helpers/TaskIDs.js index 467a225c..8d815241 100644 --- a/src/helpers/TaskIDs.js +++ b/src/helpers/TaskIDs.js @@ -15,4 +15,5 @@ export const textGuidedImageToImage = "text_guided_image_to_image"; export const documentQuestionAnswering = "document_question_answering"; export const textToImage = "text_to_image" export const textToVideo = "text_to_video" +export const imageToText = "image_to_text" export const pending = "pending"; diff --git a/src/helpers/UppyFileTypeCheckerPlugin.js b/src/helpers/UppyFileTypeCheckerPlugin.js index a602a181..1f59b334 100644 --- a/src/helpers/UppyFileTypeCheckerPlugin.js +++ b/src/helpers/UppyFileTypeCheckerPlugin.js @@ -11,7 +11,8 @@ import { styleTransfer, imageTo3D, textGuidedImageToImage, - visualQuestionAnswering + visualQuestionAnswering, + imageToText } from './TaskIDs'; import fileTypeChecker from "file-type-checker"; @@ -31,6 +32,7 @@ export const getAllowedFileTypes = (task) => { case styleTransfer: case imageTo3D: case instance_segmentation: + case imageToText: return { fileTypes: ['bmp', 'gif', 'ico', 'jpeg', 'pdf', 'png', 'psd'], mimeTypes: ['image/*'] diff --git a/src/helpers/sampleImages.js b/src/helpers/sampleImages.js index d1f83d10..311e110f 100644 --- a/src/helpers/sampleImages.js +++ b/src/helpers/sampleImages.js @@ -215,4 +215,19 @@ export const SampleTextToVideo = [ "Cat and dog playing", "Flower in a garden", "Sunset on a beach" +]; + +export const SampleImageToText = [ + { + src: "https://s3.amazonaws.com/uploads.staging.mlmodelscope.org/birdy.png", + alt: "bird" + }, + { + src: "https://s3.amazonaws.com/uploads.staging.mlmodelscope.org/kitty.png", + alt: "cat" + }, + { + src: "https://s3.amazonaws.com/uploads.staging.mlmodelscope.org/crabby.png", + alt: "crab" + } ]; \ No newline at end of file diff --git a/src/resources/icons/icon-imageToText.svg b/src/resources/icons/icon-imageToText.svg new file mode 100644 index 00000000..0d1cf45e --- /dev/null +++ b/src/resources/icons/icon-imageToText.svg @@ -0,0 +1 @@ + \ No newline at end of file