From 0148bb6ec71c36111c21fdc0480e7774e2eb3eb8 Mon Sep 17 00:00:00 2001 From: Valay Dave Date: Mon, 24 Jan 2022 20:43:39 -0800 Subject: [PATCH] Stacked @card v1.2 : Customizing `@card` with `current.card` (#894) * allow passing userland `MetaflowCardComponents` - Added api for `current.cards` that handles multiple decorators * `current.cards` with `id` support. - Introduced `ALLOW_USER_COMPONENTS` attribute to `MetaflowCard` class - setting `ALLOW_USER_COMPONENTS=True` allow editable cards - Modified logic of `CardComponentCollector` - `CardComponentCollector.append` only accessible to default editble card * Added `customize=True` in `@card` - `customize=True` only allowed for one @card deco per @step - `customize=True` sets the card to be default editable - `current.card.append` appends to @card with `customize=True` * Stacked @card v1.2: Read cli changes for Supporting Multiple`@card`s (#895) * Added the `--id` option to `card view`/`card get` - modified datastore and fixed the exception class. - Allowing some ambiguity in pathspec argument for `get/view` command. * Added `id` argument to `get_cards` * Bringing `card list` cli functionality from test-suite branch - added functionality to list cards about a task and even list it as JSON. * changed hash checking logic. - instead of equating we check `startswith` * Added list many feature for `card list` - listing all cards from the latest run when card list is called with no argument * Added `card list` print formatting changes - `--as-json` works for many cards and single cards * Stacked @card v1.2 : New `MetaflowCardComponent`s (#896) * user-facing `MetaflowCardComponent`s - import via `from metaflow.cards import Artifact,..` * 7 Major Changes: - Removed `Section` component from `metaflow.cards` - Making user-facing component inherent to `UserComponent` which in turn inherits `MetaflowCardComponent` - Wrap all `UserComponents` inside a section after rendering everything per card - created a `render_safely` decorator to ensure fail-safe render of `UserComponent`s - removed code from component serializer which used internal components - Refactored some components that return render - Added docstrings to all components. * JS + CSS + Cards UI Build * Stacked @card v1.2 : Graph Related Changes to card cli (#911) * accomodating changes from Netflix/metaflow#833 - Minor tweaks to `graph.py` * Stacked @card v1.2 : Namespace Packages with `@card` (#897) * setup import of cards from `metaflow_extensions` using `metaflow.extension_support` - Added import modules in `card_modules` - Added hook in card decorators to add custom packages * Added some debug statements for external imports. * Stacked @card v1.2 : Test cases for Multiple `@card`s (#898) * Multiple Cards Test Suite Mods (#27) - Added `list_cards` to `CliCheck` and `MetadataCheck` - Bringing Netflix/metaflow#875 into the code - Added a card that prints taskspec with random number * Added test case for multiple cards * Added tests card, summary : - `current.cards['myid']` should be accessible when cards have an `id` argument in decorator - `current.cards.append` should not work when there are no single default editable card. - if a card has `ALLOW_USER_COMPONENTS=False` then it can still be edited via accessing it with `id` property. - adding arbitrary information to `current.cards.append` should not break user code. - Only cards with `ALLOW_USER_COMPONENTS=True` are considered default editable. - If a single @card decorator is present with `id` then it `current.cards.append` should still work - Access of `current.cards` with non existant id should not fail. - `current.cards.append` should be accessible to the card with `customize=True`. - * fixed `DefaultEditableCardTest` test case - Fixed comment - Fixed the `customize=True` test case. * ensure `test_pathspec_card` has no duplicates - ensure entropy of rendered information is high enough to not overwrite a file. * test case fix : `current.cards` to `current.card` * Added Test case to support import of cards. - Test case validates that broken card modules don't break metaflow - test case validates that we are able to import cards from metaflow_extensions - Test case validate that cards can be editable if they are importable. * Added Env var to tests to avoid warnings added to cards. * Added Test for card resume. * Stacked @card v1.2: Card Dev Docs (#899) Co-authored-by: Brendan Gibson Co-authored-by: Brendan Gibson <93726128+obgibson@users.noreply.github.com> Co-authored-by: adam <203779+seethroughdev@users.noreply.github.com> Co-authored-by: Romain Cledat --- docs/cards.md | 306 +++ metaflow/cards.py | 18 +- metaflow/graph.py | 2 + metaflow/metaflow_config.py | 9 +- metaflow/plugins/__init__.py | 21 +- metaflow/plugins/cards/card_cli.py | 393 +++- metaflow/plugins/cards/card_client.py | 41 +- metaflow/plugins/cards/card_datastore.py | 78 +- metaflow/plugins/cards/card_decorator.py | 161 +- .../plugins/cards/card_modules/__init__.py | 111 + metaflow/plugins/cards/card_modules/base.html | 9 +- metaflow/plugins/cards/card_modules/basic.py | 251 ++- .../plugins/cards/card_modules/bundle.css | 71 +- metaflow/plugins/cards/card_modules/card.py | 8 + .../plugins/cards/card_modules/components.py | 408 ++++ .../card_modules/convert_to_native_type.py | 38 +- metaflow/plugins/cards/card_modules/main.js | 10 +- .../cards/card_modules/renderer_tools.py | 49 + .../plugins/cards/card_modules/test_cards.py | 63 + metaflow/plugins/cards/card_resolver.py | 5 +- .../plugins/cards/component_serializer.py | 346 +++ metaflow/plugins/cards/exception.py | 64 +- metaflow/plugins/cards/ui/README.md | 2 +- metaflow/plugins/cards/ui/cypress.json | 7 + .../cards/ui/cypress/fixtures/example.json | 5 + .../cards/ui/cypress/integration/demo_spec.ts | 123 ++ .../ui/cypress/integration/utils_spec.ts | 60 + .../plugins/cards/ui/cypress/plugins/index.js | 19 + .../cards/ui/cypress/support/commands.js | 25 + .../plugins/cards/ui/cypress/support/index.js | 20 + metaflow/plugins/cards/ui/package.json | 42 +- .../plugins/cards/ui/public/card-example.json | 78 +- metaflow/plugins/cards/ui/public/index.html | 2 +- metaflow/plugins/cards/ui/rollup.config.js | 13 +- metaflow/plugins/cards/ui/src/App.svelte | 8 +- metaflow/plugins/cards/ui/src/app.css | 59 +- .../ui/src/components/artifact-row.svelte | 15 +- .../cards/ui/src/components/artifacts.svelte | 21 +- .../cards/ui/src/components/aside-nav.svelte | 4 +- .../cards/ui/src/components/bar-chart.svelte | 4 +- .../components/card-component-renderer.svelte | 28 +- .../cards/ui/src/components/dag/dag.svelte | 10 +- .../cards/ui/src/components/heading.svelte | 2 +- .../cards/ui/src/components/image.svelte | 2 +- .../cards/ui/src/components/line-chart.svelte | 4 +- .../cards/ui/src/components/log.svelte | 3 +- .../cards/ui/src/components/markdown.svelte | 9 + .../cards/ui/src/components/modal.svelte | 32 +- .../cards/ui/src/components/page.svelte | 2 +- .../cards/ui/src/components/section.svelte | 7 +- .../cards/ui/src/components/subtitle.svelte | 2 +- .../src/components/table-data-renderer.svelte | 44 + .../ui/src/components/table-horizontal.svelte | 6 +- .../ui/src/components/table-vertical.svelte | 5 +- .../cards/ui/src/components/text.svelte | 2 +- .../cards/ui/src/components/title.svelte | 2 +- metaflow/plugins/cards/ui/src/constants.ts | 60 +- metaflow/plugins/cards/ui/src/global.css | 2 +- metaflow/plugins/cards/ui/src/main.ts | 7 +- metaflow/plugins/cards/ui/src/store.ts | 15 +- metaflow/plugins/cards/ui/src/types.ts | 21 +- metaflow/plugins/cards/ui/src/utils.ts | 74 +- metaflow/plugins/cards/ui/tsconfig.json | 6 +- metaflow/plugins/cards/ui/yarn.lock | 1938 +++++++++++++++-- .../plugins/cards/brokencard/__init__.py | 13 + .../plugins/cards/simplecard/__init__.py | 23 + test/core/metaflow_test/__init__.py | 3 + test/core/metaflow_test/cli_check.py | 49 +- test/core/metaflow_test/metadata_check.py | 51 +- test/core/tests/card_default_editable.py | 122 ++ .../tests/card_default_editable_customize.py | 99 + .../tests/card_default_editable_with_id.py | 107 + test/core/tests/card_id_append.py | 86 + test/core/tests/card_import.py | 132 ++ test/core/tests/card_multiple.py | 97 + test/core/tests/card_resume.py | 49 + 76 files changed, 5446 insertions(+), 637 deletions(-) create mode 100644 docs/cards.md create mode 100644 metaflow/plugins/cards/card_modules/components.py create mode 100644 metaflow/plugins/cards/card_modules/renderer_tools.py create mode 100644 metaflow/plugins/cards/component_serializer.py create mode 100644 metaflow/plugins/cards/ui/cypress.json create mode 100644 metaflow/plugins/cards/ui/cypress/fixtures/example.json create mode 100644 metaflow/plugins/cards/ui/cypress/integration/demo_spec.ts create mode 100644 metaflow/plugins/cards/ui/cypress/integration/utils_spec.ts create mode 100644 metaflow/plugins/cards/ui/cypress/plugins/index.js create mode 100644 metaflow/plugins/cards/ui/cypress/support/commands.js create mode 100644 metaflow/plugins/cards/ui/cypress/support/index.js create mode 100644 metaflow/plugins/cards/ui/src/components/markdown.svelte create mode 100644 metaflow/plugins/cards/ui/src/components/table-data-renderer.svelte create mode 100644 test/core/metaflow_extensions/test_org/plugins/cards/brokencard/__init__.py create mode 100644 test/core/metaflow_extensions/test_org/plugins/cards/simplecard/__init__.py create mode 100644 test/core/tests/card_default_editable.py create mode 100644 test/core/tests/card_default_editable_customize.py create mode 100644 test/core/tests/card_default_editable_with_id.py create mode 100644 test/core/tests/card_id_append.py create mode 100644 test/core/tests/card_import.py create mode 100644 test/core/tests/card_multiple.py create mode 100644 test/core/tests/card_resume.py diff --git a/docs/cards.md b/docs/cards.md new file mode 100644 index 00000000000..cf3a5d9fe02 --- /dev/null +++ b/docs/cards.md @@ -0,0 +1,306 @@ +# Metaflow Cards + +Metaflow Cards make it possible to produce human-readable report cards automatically from any Metaflow tasks. You can use the feature to observe results of Metaflow runs, visualize models, and share outcomes with non-technical stakeholders. + +While Metaflow comes with a built-in default card that shows all outputs of a task without any changes in the code, the most exciting use cases are enabled by custom cards: With a few additional lines of Python code, you can change the structure and the content of the report to highlight data that matters to you. For more flexible or advanced reports, you can create custom card templates that generate arbitrary HTML. + +Anyone can create card templates and share them as standard Python packages. Cards can be accessed via the Metaflow CLI even without an internet connection, making it possible to use them in security-conscious environments. Cards are also integrated with the latest release of the Metaflow GUI, allowing you to enrich the existing task view with application-specific information. + +## Technical Details + +### Table Of Contents +* [@card decorator](#card-decorator) + * [Parameters](#parameters) + * [Usage Semantics](#usage-semantics) +* [CardDatastore](#carddatastore) +* [Card CLI](#card-cli) +* [Access cards in notebooks](#access-cards-in-notebooks) +* [MetaflowCard](#metaflowcard) + * [Attributes](#attributes) + * [__init__ Parameters](#__init__-parameters) +* [MetaflowCardComponent](#metaflowcardcomponent) +* [DefaultCard](#defaultcard) +* [Default MetaflowCardComponent](#default-metaflowcardcomponent) +* [Editing MetaflowCard from @step code](#editing-metaflowcard-from-step-code) + * [current.card (CardComponentCollector)](#currentcard-cardcomponentcollector) +* [Creating Custom Installable Cards](#creating-custom-cards) + +Metaflow cards can be created by placing an [`@card` decorator](#@card-decorator) over a `@step`. Cards are created after a metaflow task ( instantiation of each `@step` ) completes execution. You can have multiple `@card` decorators for an individual `@step`. Each decorator takes a `type` argument which defaults to the value `default`. The `type` argument corresponds the [MetaflowCard.type](#metaflowcard). On task completion ,every `@card` decorator creates a separate subprocess to call the [card create cli command](#card-cli). This command will create and [store](#carddatastore) the HTML page for the card. + +Since the cards are stored in the datastore we can access them via the `view/get` commands in the [card_cli](#card-cli) or by using the `get_cards` [function](../metaflow/plugins/cards/card_client.py). + +Metaflow ships with a [DefaultCard](#defaultcard) which visualizes artifacts, images, and `pandas.Dataframe`s. Metaflow also ships custom components like `Image`, `Table`, `Markdown` etc. These can be added to a card at `Task` runtime. Cards can also be edited from `@step` code using the [current.card](#editing-metaflowcard-from-@step-code) interface. `current.card` helps add `MetaflowCardComponent`s from `@step` code to a `MetaflowCard`. `current.card` offers methods like `current.card.append` or `current.card['myid']` to helps add components to a card. Since there can be many `@card`s over a `@step`, `@card` also comes with an `id` argument. The `id` argument helps disambigaute the card a component goes to when using `current.card`. For example, setting `@card(id='myid')` and calling `current.card['myid'].append(x)` will append `MetaflowCardComponent` `x` to the card with `id='myid'`. + +### `@card` decorator +The `@card` [decorator](../metaflow/plugins/cards/card_decorator.py) is implemented by inheriting the `StepDecorator`. The decorator can be placed over `@step` to create an HTML file visualizing information from the task. + +#### Parameters +- `type` `(str)` [Defaults to `default`]: The `type` of `MetaflowCard` to create. More details on `MetaflowCard`s is provided [later in this document](#metaflowcard). +- `options` `(dict)` : options to instantiate a `MetaflowCard`. `MetaflowCard`s will be instantiated with the `options` keyword argument. The value of this argument will be this dictionary. +- `timeout` `(int)` [Defaults to `45`]: Amount of time to wait before killing the card subprocess +- `save_errors` `(bool)` [Defaults to `True`]: If set to `True` then any failure on rendering a `MetaflowCard` will generate an `ErrorCard` instead with the full stack trace of the failure. + +#### Usage Semantics + +```python +from metaflow import FlowSpec,step,card + +class ModelTrainingFlow(FlowSpec): + + @step + def start(self): + self.next(self.train) + + @card( + type='default', + options={"only_repr":False}, + timeout=100, + save_errors = False + ) + @step + def train(self): + import random + import numpy as np + self.loss = np.random.randn(100,100)*100 + self.next(self.end) + + @step + def end(self): + print("Done Computation") + +if __name__ == "__main__": + ModelTrainingFlow() +``` + + + +### `CardDatastore` +The [CardDatastore](../metaflow/plugins/cards/card_datastore.py) is used by the the [card_cli](#card-cli) and the [metaflow card client](#access-cards-in-notebooks) (`get_cards`). It exposes methods to get metadata about a card and the paths to cards for a `pathspec`. + +### Card CLI +Methods exposed by the [card_cli](../metaflow/plugins/cards/.card_cli.py). : + +- `create` : Creates the card in the datastore for a `Task`. Adding a `--render-error-card` will render a `ErrorCard` upon failure to render the card of the selected `type`. If `--render-error-card` is not passed then the CLI will fail loudly with the exception. +```sh +# python myflow.py card create --type --timeout --options "{}" +python myflow.py card create 100/stepname/1000 --type default --timeout 10 --options '{"only_repr":false}' --render-error-card +``` + +- `view/get` : Calling the `view` CLI method will open the card associated for the pathspec in a browser. The `get` method gets the HTML for the card and prints it. You can call the command in the following way. Adding `--follow-resumed` as argument will retrieve the card for the origin resumed task. +```sh +# python myflow.py card view --hash --type +python myflow.py card view 100/stepname/1000 --hash ads34 --type default --follow-resumed +``` + +### Access cards in notebooks +Metaflow also exposes a `get_cards` client that helps resolve cards outside the CLI. Example usage is shown below : +```python +from metaflow import Task +from metaflow.cards import get_cards + +taskspec = 'MyFlow/1000/stepname/100' +task = Task(taskspec) +card_iterator = get_cards(task) # you can even call `get_cards(taskspec)` + +# view card in browser +card = card_iterator[0] +card.view() + +# Get HTML of card +html = card_iterator[0].get() +``` + +### `MetaflowCard` + +The [MetaflowCard](../metaflow/plugins/cards/card_modules/card.py) class is the base class to create custom cards. All subclasses require implementing the `render` function. The `render` function is expected to return a string. Below is an example snippet of usage : +```python +from metaflow.cards import MetaflowCard +# path to the custom html file which is a `mustache` template. +PATH_TO_CUSTOM_HTML = 'myhtml.html' + +class CustomCard(MetaflowCard): + type = "custom_card" + + def __init__(self, options={"no_header": True}, graph=None,components=[]): + super().__init__() + self._no_header = True + self._graph = graph + if "no_header" in options: + self._no_header = options["no_header"] + + def render(self, task): + pt = self._get_mustache() + data = dict( + graph = self._graph, + header = self._no_header + ) + html_template = None + with open(PATH_TO_CUSTOM_HTML) as f: + html_template = f.read() + return pt.render(html_template,data) +``` + +The class consists of the `_get_mustache` method that returns [chevron](https://github.com/noahmorrison/chevron) object ( a `mustache` based [templating engine](http://mustache.github.io/mustache.5.html) ). Using the `mustache` templating engine you can rewrite HTML template file. In the above example the `PATH_TO_CUSTOM_HTML` is the file that holds the `mustache` HTML template. +#### Attributes +- `type (str)` : The `type` of card. Needs to ensure correct resolution. +- `ALLOW_USER_COMPONENTS (bool)` : Setting this to `True` will make the a card be user editable. More information on user editable cards can be found [here](#editing-metaflowcard-from-@step-code). + +#### `__init__` Parameters +- `components` `(List[str])`: `components` is a list of `render`ed `MetaflowCardComponent`s created at `@step` runtime. These are passed to the `card create` cli command via a tempfile path in the `--component-file` argument. +- `graph` `(Dict[str,dict])`: The DAG associated to the flow. It is a dictionary of the form `stepname:step_attributes`. `step_attributes` is a dictionary of metadata about a step , `stepname` is the name of the step in the DAG. +- `options` `(dict)`: helps control the behavior of individual cards. + - For example, the `DefaultCard` supports `options` as dictionary of the form `{"only_repr":True}`. Here setting `only_repr` as `True` will ensure that all artifacts are serialized with `reprlib.repr` function instead of native object serialization. + + +### `MetaflowCardComponent` + +The `render` function of the `MetaflowCardComponent` class returns a `string` or `dict`. It can be called in the `MetaflowCard` class or passed during runtime execution. An example of using `MetaflowCardComponent` inside `MetaflowCard` can be seen below : +```python +from metaflow.cards import MetaflowCard,MetaflowCardComponent + +class Title(MetaflowCardComponent): + def __init__(self,text): + self._text = text + + def render(self): + return "

%s

"%self._text + +class Text(MetaflowCardComponent): + def __init__(self,text): + self._text = text + + def render(self): + return "

%s

"%self._text + +class CustomCard(MetaflowCard): + type = "custom_card" + + HTML = "{data}" + + def __init__(self, options={"no_header": True}, graph=None,components=[]): + super().__init__() + self._no_header = True + self._graph = graph + if "no_header" in options: + self._no_header = options["no_header"] + + def render(self, task): + pt = self._get_mustache() + data = '\n'.join([ + Title("Title 1").render(), + Text("some text comes here").render(), + Title("Title 2").render(), + Text("some text comes here again").render(), + ]) + data = dict( + data = data + ) + html_template = self.HTML + + return pt.render(html_template,data) +``` + +### `DefaultCard` +The [DefaultCard](../metaflow/plugins/cards/card_modules/basic.py) is a default card exposed by metaflow. This will be used when the `@card` decorator is called without any `type` argument or called with `type='default'` argument. It will also be the default card used with cli. The card uses a [HTML template](../metaflow/plugins/cards/card_modules/base.html) along with a [JS](../metaflow/plugins/cards/card_modules/main.js) and a [CSS](../metaflow/plugins/cards/card_modules/bundle.css) files. + +The [HTML](../metaflow/plugins/cards/card_modules/base.html) is a template which works with [JS](../metaflow/plugins/cards/card_modules/main.js) and [CSS](../metaflow/plugins/cards/card_modules/bundle.css). + +The JS and CSS are created after building the JS and CSS from the [cards-ui](../metaflow/plugins/cards/ui/README.md) directory. [cards-ui](../metaflow/plugins/cards/ui/README.md) consists of the JS app that generates the HTML view from a JSON object. + +### Default `MetaflowCardComponent` + +`DefaultCard`/`BlankCard` can be given `MetaflowCardComponent` from `@step` code. The following are the main `MetaflowCardComponent`s available via `metaflow.cards`. +- `Artifact` : A component to help log artifacts at task runtime. + - Example : `Artifact(some_variable,compress=True)` +- `Table` : A component to create a table in the card HTML. Consists of convenience methods : + - `Table.from_dataframe(df)` to make a table from a dataframe. +- `Image` : A component to create an image in the card HTML: + - `Image(bytearr,"my Image from bytes")`: to directly from `bytes` + - `Image.from_pil_image(pilimage,"From PIL Image")` : to create an image from a `PIL.Image` + - `Image.from_matplotlib(plot,"My matplotlib plot")` : to create an image from a plot +- `Error` : A wrapper subcomponent to display errors. Accepts an `exception` and a `title` as arguments. +- `Markdown` : A component that renders markdown in the HTML template +### Editing `MetaflowCard` from `@step` code +`MetaflowCard`s can be edited from `@step` code using the `current.card` interface. The `current.card` interface will only be active when a `@card` decorator is placed over a `@step`. To understand the workings of `current.card` consider the following snippet. +```python +@card(type='blank',id='a') +@card(type='default') +@step +def train(self): + from metaflow.cards import Markdown + from metaflow import current + current.card.append(Markdown('# This is present in the blank card with id "a"')) + current.card['a'].append(Markdown('# This is present in the default card')) + self.t = dict( + hi = 1, + hello = 2 + ) + self.next(self.end) +``` +In the above scenario there are two `@card` decorators which are being customized by `current.card`. The `current.card.append`/ `current.card['a'].append` methods only accepts objects which are subclasses of `MetaflowCardComponent`. The `current.card.append`/ `current.card['a'].append` methods only add a component to **one** card. Since there can be many cards for a `@step`, a **default editabled card** is resolved to disambiguate which card has access to the `append`/`extend` methods within the `@step`. A default editable card is a card that will have access to the `current.card.append`/`current.card.extend` methods. `current.card` resolve the default editable card before a `@step` code gets executed. It sets the default editable card once the last `@card` decorator calls the `task_pre_step` callback. In the above case, `current.card.append` will add a `Markdown` component to the card of type `default`. `current.card['a'].append` will add the `Markdown` to the `blank` card whose `id` is `a`. A `MetaflowCard` can be user editable, if `ALLOW_USER_COMPONENTS` is set to `True`. Since cards can be of many types, **some cards can also be non editable by users** (Cards with `ALLOW_USER_COMPONENTS=False`). Those cards won't be eligible to access the `current.card.append`. A non user editable card can be edited through expicitly setting an `id` and accessing it via `current.card['myid'].append` or by looking it up by its type via `current.card.get(type=’pytorch’)`. + +#### `current.card` (`CardComponentCollector`) + +The `CardComponentCollector` is the object responsible for resolving a `MetaflowCardComponent` to the card referenced in the `@card` decorator. + +Since there can be many cards, `CardComponentCollector` has a `_finalize` function. The `_finalize` function is called once the **last** `@card` decorator calls `task_pre_step`. The `_finalize` function will try to find the **default editable card** from all the `@card` decorators on the `@step`. The default editable card is the card that can access the `current.card.append`/`current.card.extend` methods. If there are multiple editable cards with no `id` then `current.card` will throw warnings when users call `current.card.append`. This is done because `current.card` cannot resolve which card the component belongs. + +The `@card` decorator also exposes another argument called `customize=True`. **Only one `@card` decorator over a `@step` can have `customize=True`**. Since cards can also be added from CLI when running a flow, adding `@card(customize=True)` will set **that particular card** from the decorator as default editable. This means that `current.card.append` will append to the card belonging to `@card` with `customize=True`. If there is more than one `@card` decorator with `customize=True` then `current.card` will throw warnings that `append` won't work. + +One important feature of the `current.card` object is that it will not fail. Even when users try to access `current.card.append` with multiple editable cards, we throw warnings but don't fail. `current.card` will also not fail when a user tries to access a card of a non-existing id via `current.card['mycard']`. Since `current.card['mycard']` gives reference to a `list` of `MetaflowCardComponent`s, `current.card` will return a non-referenced `list` when users try to access the dictionary inteface with a non existing id (`current.card['my_non_existant_card']`). + +Once the `@step` completes execution, every `@card` decorator will call `current.card._serialize` (`CardComponentCollector._serialize`) to get a JSON serializable list of `str`/`dict` objects. The `_serialize` function internally calls all [component's](#metaflowcardcomponent) `render` function. This list is `json.dump`ed to a `tempfile` and passed to the `card create` subprocess where the `MetaflowCard` can use them in the final output. + +### Creating Custom Installable Cards +Custom cards can be installed with the help of the `metaflow_extensions` namespace package. Every `metaflow_extensions` module having custom cards should follow the below directory structure. . You can see an example cookie-cutter card over [here](https://github.com/outerbounds/metaflow-card-html). +``` +your_package/ # the name of this dir doesn't matter +├ setup.py +├ metaflow_extensions/ +│ └ organizationA/ # NO __init__.py file, This is a namespace package. +│ └ plugins/ # NO __init__.py file, This is a namespace package. +│ └ cards/ # NO __init__.py file, This is a namespace package. +│ └ my_card_module/ # Name of card_module +│ └ __init__.py. # This is the __init__.py is required to recoginize `my_card_module` as a package +│ └ somerandomfile.py. # Some file as a part of the package. +. +``` + +The `__init__.py` of the `metaflow_extensions.organizationA.plugins.cards.my_card_module`, requires a `CARDS` attribute which needs to be a `list` of objects inheriting `MetaflowCard` class. For Example, in the below `__init__.py` file exposes a `MetaflowCard` of `type` "y_card2". + +```python +from metaflow.cards import MetaflowCard + +class YCard(MetaflowCard): + type = "y_card2" + + ALLOW_USER_COMPONENTS = True + + def __init__(self, options={}, components=[], graph=None): + self._components = components + + def render(self, task): + return "I am Y card %s" % '\n'.join([comp for comp in self._components]) + +CARDS = [YCard] +``` + +Having this `metaflow_extensions` module present in the PYTHONPATH can also work. Custom cards can also be created by reusing components provided by metaflow. For Example : +```python +from metaflow.cards import BlankCard +from metaflow.cards import Artifact,Table + +class MyCustomCard(BlankCard): + + type = 'my_custom_card' + + def render(self, task): + art_com [ + Table( + [[Artifact(k.data,k.id)] for k in task] + ).render() + ] + return super().render(task,components=[art_com]) + +CARDS = [MyCustomCard] +``` \ No newline at end of file diff --git a/metaflow/cards.py b/metaflow/cards.py index 3bc4eba8321..16893c7e164 100644 --- a/metaflow/cards.py +++ b/metaflow/cards.py @@ -1,21 +1,19 @@ from metaflow.plugins.cards.card_client import get_cards from metaflow.plugins.cards.card_modules.card import MetaflowCardComponent, MetaflowCard +from metaflow.plugins.cards.card_modules.components import ( + Artifact, + Table, + Image, + Error, + Markdown, +) from metaflow.plugins.cards.card_modules.basic import ( DefaultCard, - TitleComponent, - SubTitleComponent, - SectionComponent, - ImageComponent, - BarChartComponent, - LineChartComponent, - TableComponent, - DagComponent, PageComponent, - ArtifactsComponent, RENDER_TEMPLATE_PATH, TaskToDict, DefaultComponent, - ChartComponent, TaskInfoComponent, ErrorCard, + BlankCard, ) diff --git a/metaflow/graph.py b/metaflow/graph.py index 06ab8620666..9651f34bb2f 100644 --- a/metaflow/graph.py +++ b/metaflow/graph.py @@ -298,6 +298,8 @@ def node_to_dict(name, node): d["foreach_artifact"] = node.foreach_param elif d["type"] == "split-parallel": d["num_parallel"] = node.num_parallel + if node.matching_join: + d["matching_join"] = node.matching_join return d def populate_block(start_name, end_name): diff --git a/metaflow/metaflow_config.py b/metaflow/metaflow_config.py index 119ee169254..c31869d570a 100644 --- a/metaflow/metaflow_config.py +++ b/metaflow/metaflow_config.py @@ -64,26 +64,29 @@ def from_conf(name, default=None): DATATOOLS_SUFFIX = from_conf("METAFLOW_DATATOOLS_SUFFIX", "data") DATATOOLS_S3ROOT = from_conf( "METAFLOW_DATATOOLS_S3ROOT", - "%s/%s" % (from_conf("METAFLOW_DATASTORE_SYSROOT_S3"), DATATOOLS_SUFFIX) + os.path.join(from_conf("METAFLOW_DATASTORE_SYSROOT_S3"), DATATOOLS_SUFFIX) if from_conf("METAFLOW_DATASTORE_SYSROOT_S3") else None, ) # Local datatools root location DATATOOLS_LOCALROOT = from_conf( "METAFLOW_DATATOOLS_LOCALROOT", - "%s/%s" % (from_conf("METAFLOW_DATASTORE_SYSROOT_LOCAL"), DATATOOLS_SUFFIX) + os.path.join(from_conf("METAFLOW_DATASTORE_SYSROOT_LOCAL"), DATATOOLS_SUFFIX) if from_conf("METAFLOW_DATASTORE_SYSROOT_LOCAL") else None, ) +# Cards related config variables DATASTORE_CARD_SUFFIX = "mf.cards" DATASTORE_CARD_LOCALROOT = from_conf("METAFLOW_CARD_LOCALROOT") DATASTORE_CARD_S3ROOT = from_conf( "METAFLOW_CARD_S3ROOT", - "%s/%s" % (from_conf("METAFLOW_DATASTORE_SYSROOT_S3"), DATASTORE_CARD_SUFFIX) + os.path.join(from_conf("METAFLOW_DATASTORE_SYSROOT_S3"), DATASTORE_CARD_SUFFIX) if from_conf("METAFLOW_DATASTORE_SYSROOT_S3") else None, ) +CARD_NO_WARNING = from_conf("METAFLOW_CARD_NO_WARNING", False) + # S3 endpoint url S3_ENDPOINT_URL = from_conf("METAFLOW_S3_ENDPOINT_URL", None) S3_VERIFY_CERTIFICATE = from_conf("METAFLOW_S3_VERIFY_CERTIFICATE", None) diff --git a/metaflow/plugins/__init__.py b/metaflow/plugins/__init__.py index 6a9d4a7ea66..5892d32e99b 100644 --- a/metaflow/plugins/__init__.py +++ b/metaflow/plugins/__init__.py @@ -150,17 +150,32 @@ def get_plugin_cli(): _merge_lists(FLOW_DECORATORS, _ext_plugins["FLOW_DECORATORS"], "name") # Cards -from .cards.card_modules.basic import DefaultCard, TaskSpecCard, ErrorCard -from .cards.card_modules.test_cards import TestErrorCard, TestTimeoutCard, TestMockCard +from .cards.card_modules.basic import DefaultCard, TaskSpecCard, ErrorCard, BlankCard +from .cards.card_modules.test_cards import ( + TestErrorCard, + TestTimeoutCard, + TestMockCard, + TestPathSpecCard, + TestEditableCard, + TestEditableCard2, + TestNonEditableCard, +) +from .cards.card_modules import MF_EXTERNAL_CARDS CARDS = [ DefaultCard, TaskSpecCard, ErrorCard, + BlankCard, TestErrorCard, TestTimeoutCard, TestMockCard, -] + TestPathSpecCard, + TestEditableCard, + TestEditableCard2, + TestNonEditableCard, + BlankCard, +] + MF_EXTERNAL_CARDS # Sidecars from ..mflog.save_logs_periodically import SaveLogsPeriodicallySidecar from metaflow.metadata.heartbeat import MetadataHeartBeat diff --git a/metaflow/plugins/cards/card_cli.py b/metaflow/plugins/cards/card_cli.py index f83019cd8e7..ae1c2a98aaa 100644 --- a/metaflow/plugins/cards/card_cli.py +++ b/metaflow/plugins/cards/card_cli.py @@ -5,6 +5,7 @@ import re import click import os +import json import signal import random from contextlib import contextmanager @@ -16,22 +17,12 @@ IncorrectCardArgsException, UnrenderableCardException, CardNotPresentException, + TaskNotFoundException, ) from .card_resolver import resolve_paths_from_task, resumed_info -# FIXME : Import the changes from Netflix/metaflow#833 for Graph -def serialize_flowgraph(flowgraph): - graph_dict = {} - for node in flowgraph: - graph_dict[node.name] = { - "type": node.type, - "box_next": node.type not in ("linear", "join"), - "box_ends": node.matching_join, - "next": node.out_funcs, - "doc": node.doc, - } - return graph_dict +id_func = id def open_in_browser(card_path): @@ -39,72 +30,116 @@ def open_in_browser(card_path): webbrowser.open(url) +def resolve_task_from_pathspec(flow_name, pathspec): + """ + resolves a task object for the pathspec query on the CLI. + Args: + flow_name : (str) : name of flow + pathspec (str) : can be `stepname` / `runid/stepname` / `runid/stepname/taskid` + + Returns: + metaflow.Task | None + """ + from metaflow import Flow, Step, Task + from metaflow.exception import MetaflowNotFound + + # since pathspec can have many variations. + pthsplits = pathspec.split("/") + task = None + run_id = None + resolving_from = "task_pathspec" + if len(pthsplits) == 1: + # This means stepname + resolving_from = "stepname" + latest_run = Flow(flow_name).latest_run + if latest_run is not None: + run_id = latest_run.pathspec + try: + task = latest_run[pathspec].task + except KeyError: + pass + elif len(pthsplits) == 2: + # This means runid/stepname + namespace(None) + resolving_from = "step_pathspec" + try: + task = Step("/".join([flow_name, pathspec])).task + except MetaflowNotFound: + pass + elif len(pthsplits) == 3: + # this means runid/stepname/taskid + namespace(None) + resolving_from = "task_pathspec" + try: + task = Task("/".join([flow_name, pathspec])) + except MetaflowNotFound: + pass + else: + # raise exception for invalid pathspec format + raise CommandException( + msg="The PATHSPEC argument should be of the form 'stepname' Or '/' Or '//'" + ) + + if task is None: + # raise Exception that task could not be resolved for the query. + raise TaskNotFoundException(pathspec, resolving_from, run_id=run_id) + + return task + + def resolve_card( ctx, pathspec, follow_resumed=True, hash=None, type=None, + card_id=None, + no_echo=False, ): - """Resolves the card path based on the arguments provided. We allow identifier to be a pathspec or a id of card. + """Resolves the card path for a query. Args: ctx: click context object - pathspec: pathspec + pathspec: pathspec can be `stepname` or `runid/stepname` or `runid/stepname/taskid` hash (optional): This is to specifically resolve the card via the hash. This is useful when there may be many card with same id or type for a pathspec. type : type of card + card_id : `id` given to card + no_echo : if set to `True` then supress logs about pathspec resolution. Raises: CardNotPresentException: No card could be found for the pathspec Returns: (card_paths, card_datastore, taskpathspec) : Tuple[List[str], CardDatastore, str] """ - if len(pathspec.split("/")) != 3: - raise CommandException( - msg="Expecting pathspec of form //" - ) - flow_name = ctx.obj.flow.name - run_id, step_name, task_id = None, None, None - # what should be the args we expose - run_id, step_name, task_id = pathspec.split("/") - pathspec = "/".join([flow_name, pathspec]) - # we set namespace to be none to avoid namespace mismatch error. - namespace(None) - task = Task(pathspec) - print_str = "Resolving card: %s" % pathspec + task = resolve_task_from_pathspec(flow_name, pathspec) + card_pathspec = task.pathspec + print_str = "Resolving card: %s" % card_pathspec if follow_resumed: origin_taskpathspec = resumed_info(task) if origin_taskpathspec: - pathspec = origin_taskpathspec - ctx.obj.echo( - "Resolving card resumed from: %s" % origin_taskpathspec, - fg="green", - ) - else: - ctx.obj.echo(print_str, fg="green") - else: + card_pathspec = origin_taskpathspec + print_str = "Resolving card resumed from: %s" % origin_taskpathspec + + if not no_echo: ctx.obj.echo(print_str, fg="green") # to resolve card_id we first check if the identifier is a pathspec and if it is then we check if the `id` is set or not to resolve card_id # todo : Fix this with `coalesce function` card_paths_found, card_datastore = resolve_paths_from_task( ctx.obj.flow_datastore, - pathspec=pathspec, + pathspec=card_pathspec, type=type, hash=hash, + card_id=card_id, ) if len(card_paths_found) == 0: # If there are no files found on the Path then raise an error of raise CardNotPresentException( - flow_name, - run_id, - step_name, - card_hash=hash, - card_type=type, + card_pathspec, card_hash=hash, card_type=type, card_id=card_id ) - return card_paths_found, card_datastore, pathspec + return card_paths_found, card_datastore, card_pathspec @contextmanager @@ -128,32 +163,67 @@ def raise_timeout(signum, frame): raise TimeoutError -def list_available_cards(ctx, path_spec, card_paths, card_datastore, command="view"): +def list_available_cards( + ctx, + pathspec, + card_paths, + card_datastore, + command="view", + show_list_as_json=False, + list_many=False, +): + # pathspec is full pathspec. # todo : create nice response messages on the CLI for cards which were found. scriptname = ctx.obj.flow.script_name path_tuples = card_datastore.get_card_names(card_paths) - ctx.obj.echo( - "\nFound %d card matching for your query..." % len(path_tuples), fg="green" - ) - task_pathspec = "/".join(path_spec.split("/")[1:]) + if show_list_as_json: + json_arr = [ + dict(id=tup.id, hash=tup.hash, type=tup.type, filename=tup.filename) + for tup in path_tuples + ] + if not list_many: + print(json.dumps(dict(pathspec=pathspec, cards=json_arr), indent=4)) + return dict(pathspec=pathspec, cards=json_arr) + + if list_many: + ctx.obj.echo("\tTask: %s" % pathspec.split("/")[-1], fg="green") + else: + ctx.obj.echo( + "Found %d card matching for your query..." % len(path_tuples), fg="green" + ) + task_pathspec = "/".join(pathspec.split("/")[1:]) card_list = [] - for path_tuple in path_tuples: - card_name = "%s-%s" % (path_tuple.type, path_tuple.hash) - card_list.append(card_name) + for path_tuple, file_path in zip(path_tuples, card_paths): + full_pth = card_datastore.create_full_path(file_path) + cpr = """ + Card Id: %s + Card Type: %s + Card Hash: %s + Card Path: %s + """ % ( + path_tuple.id, + path_tuple.type, + path_tuple.hash, + full_pth, + ) + card_list.append(cpr) random_idx = 0 if len(path_tuples) == 1 else random.randint(0, len(path_tuples) - 1) - _, randhash = path_tuples[random_idx] - ctx.obj.echo("\n\t".join([""] + card_list), fg="blue") - ctx.obj.echo( - "\n\tExample access from CLI via: \n\t %s\n" - % make_command( - scriptname, - task_pathspec, - command=command, - hash=randhash[:NUM_SHORT_HASH_CHARS], - ), - fg="yellow", - ) + _, randhash, _, file_name = path_tuples[random_idx] + join_char = "\n\t" + ctx.obj.echo(join_char.join([""] + card_list) + "\n", fg="blue") + + if command is not None: + ctx.obj.echo( + "\n\tExample access from CLI via: \n\t %s\n" + % make_command( + scriptname, + task_pathspec, + command=command, + hash=randhash[:NUM_SHORT_HASH_CHARS], + ), + fg="yellow", + ) def make_command( @@ -176,6 +246,62 @@ def make_command( ) +def list_many_cards( + ctx, + type=None, + hash=None, + card_id=None, + follow_resumed=None, + as_json=None, +): + from metaflow import Flow + + flow = Flow(ctx.obj.flow.name) + run = flow.latest_run + cards_found = 0 + if not as_json: + pass + ctx.obj.echo("Listing cards for run %s" % run.pathspec, fg="green") + js_list = [] + for step in run: + step_str_printed = False # variable to control printing stepname once. + for task in step: + try: + available_card_paths, card_datastore, pathspec = resolve_card( + ctx, + "/".join(task.pathspec.split("/")[1:]), + type=type, + hash=hash, + card_id=card_id, + follow_resumed=follow_resumed, + no_echo=True, + ) + if not step_str_printed and not as_json: + ctx.obj.echo("Step : %s" % step.id, fg="green") + step_str_printed = True + + js_resp = list_available_cards( + ctx, + pathspec, + available_card_paths, + card_datastore, + command=None, + show_list_as_json=as_json, + list_many=True, + ) + if as_json: + js_list.append(js_resp) + cards_found += 1 + except CardNotPresentException: + pass + if cards_found == 0: + raise CardNotPresentException( + run.pathspec, card_hash=hash, card_type=type, card_id=card_id + ) + if as_json: + print(json.dumps(js_list, indent=4)) + + @click.group() def cli(): pass @@ -212,7 +338,14 @@ def card_read_options_and_arguments(func): default=None, show_default=True, type=str, - help="Type of card being created", + help="Type of card", + ) + @click.option( + "--id", + default=None, + show_default=True, + type=str, + help="Id of the card", ) @click.option( "--follow-resumed/--no-follow-resumed", @@ -266,6 +399,20 @@ def render_card(mf_card, task, timeout_value=None): is_flag=True, help="Upon failing to render a card, render a card holding the stack trace", ) +@click.option( + "--component-file", + default=None, + show_default=True, + type=str, + help="JSON File with Pre-rendered components.(internal)", +) +@click.option( + "--id", + default=None, + show_default=True, + type=str, + help="ID of the card", +) @click.pass_context def create( ctx, @@ -273,9 +420,11 @@ def create( type=None, options=None, timeout=None, + component_file=None, render_error_card=False, + id=None, ): - + card_id = id rendered_info = None # Variable holding all the information which will be rendered error_stack_trace = None # Variable which will keep a track of error @@ -286,12 +435,17 @@ def create( flowname = ctx.obj.flow.name full_pathspec = "/".join([flowname, pathspec]) - # todo : Import the changes from Netflix/metaflow#833 for Graph - graph_dict = serialize_flowgraph(ctx.obj.graph) + graph_dict, _ = ctx.obj.graph.output_steps() + + # Components are rendered in a Step and added via `current.card.append` are added here. + component_arr = [] + if component_file is not None: + with open(component_file, "r") as f: + component_arr = json.load(f) task = Task(full_pathspec) from metaflow.plugins import CARDS - from metaflow.plugins.cards.exception import CARD_ID_PATTERN + from metaflow.plugins.cards.exception import CARD_ID_PATTERN, TYPE_CHECK_REGEX from metaflow.cards import ErrorCard error_card = ErrorCard @@ -316,7 +470,9 @@ def create( # then check for render_error_card and accordingly # store the exception as a string or raise the exception try: - mf_card = filtered_card(options=options, components=[], graph=graph_dict) + mf_card = filtered_card( + options=options, components=component_arr, graph=graph_dict + ) except TypeError as e: if render_error_card: mf_card = None @@ -348,8 +504,17 @@ def create( else: save_type = "error" + # If card_id is doesn't match regex pattern then we will set it as None + if card_id is not None and re.match(CARD_ID_PATTERN, card_id) is None: + ctx.obj.echo( + "`--id=%s` doesn't match REGEX pattern. `--id` will be set to `None`. Please create `--id` of pattern %s." + % (card_id, TYPE_CHECK_REGEX), + fg="red", + ) + card_id = None + if rendered_info is not None: - card_info = card_datastore.save_card(save_type, rendered_info) + card_info = card_datastore.save_card(save_type, rendered_info, card_id=card_id) ctx.obj.echo( "Card created with type: %s and hash: %s" % (card_info.type, card_info.hash[:NUM_SHORT_HASH_CHARS]), @@ -366,54 +531,132 @@ def view( pathspec, hash=None, type=None, + id=None, follow_resumed=False, ): """ View the HTML card in browser based on the pathspec.\n - The Task pathspec is of the form:\n - //\n + The pathspec can be of the form:\n + - \n + - /\n + - //\n """ + card_id = id available_card_paths, card_datastore, pathspec = resolve_card( ctx, pathspec, type=type, hash=hash, + card_id=card_id, follow_resumed=follow_resumed, ) if len(available_card_paths) == 1: open_in_browser(card_datastore.cache_locally(available_card_paths[0])) else: list_available_cards( - ctx, pathspec, available_card_paths, card_datastore, command="view" + ctx, + pathspec, + available_card_paths, + card_datastore, + command="view", ) @card.command() @click.argument("pathspec") +@click.argument("path", required=False) @card_read_options_and_arguments @click.pass_context def get( ctx, pathspec, + path, hash=None, type=None, + id=None, follow_resumed=False, ): """ Get the HTML string of the card based on pathspec.\n - The Task pathspec is of the form:\n - //\n + The pathspec can be of the form:\n + - \n + - /\n + - //\n + + Save the card by adding the `path` argument. + ``` + python myflow.py card get start a.html --type default + ``` """ + card_id = id available_card_paths, card_datastore, pathspec = resolve_card( ctx, pathspec, type=type, hash=hash, + card_id=card_id, follow_resumed=follow_resumed, ) if len(available_card_paths) == 1: + if path is not None: + card_datastore.cache_locally(available_card_paths[0], path) + return print(card_datastore.get_card_html(available_card_paths[0])) else: list_available_cards( - ctx, pathspec, available_card_paths, card_datastore, command="get" + ctx, + pathspec, + available_card_paths, + card_datastore, + command="get", + ) + + +@card.command() +@click.argument("pathspec", required=False) +@card_read_options_and_arguments +@click.option( + "--as-json", + default=False, + is_flag=True, + help="Print all available cards as a JSON object", +) +@click.pass_context +def list( + ctx, + pathspec=None, + hash=None, + type=None, + id=None, + follow_resumed=False, + as_json=False, +): + card_id = id + if pathspec is None: + list_many_cards( + ctx, + type=type, + hash=hash, + card_id=card_id, + follow_resumed=follow_resumed, + as_json=as_json, ) + return + + available_card_paths, card_datastore, pathspec = resolve_card( + ctx, + pathspec, + type=type, + hash=hash, + card_id=card_id, + follow_resumed=follow_resumed, + no_echo=as_json, + ) + list_available_cards( + ctx, + pathspec, + available_card_paths, + card_datastore, + command=None, + show_list_as_json=as_json, + ) diff --git a/metaflow/plugins/cards/card_client.py b/metaflow/plugins/cards/card_client.py index 3fe5418f3c4..ec8c3ef046f 100644 --- a/metaflow/plugins/cards/card_client.py +++ b/metaflow/plugins/cards/card_client.py @@ -9,8 +9,10 @@ ) import os import tempfile +import uuid _TYPE = type +_ID_FUNC = id class Card: @@ -36,6 +38,8 @@ def __init__( card_ds, type, path, + hash, + id=None, html=None, created_on=None, from_resumed=False, @@ -46,8 +50,10 @@ def __init__( self._html = html self._created_on = created_on self._card_ds = card_ds + self._card_id = id # public attributes + self.hash = hash self.type = type self.from_resumed = from_resumed self.origin_pathspec = origin_pathspec @@ -65,6 +71,10 @@ def get(self): def path(self): return self._path + @property + def id(self): + return self._card_id + def __str__(self): return "" % self._path @@ -79,7 +89,17 @@ def view(self): webbrowser.open(url) def _repr_html_(self): - return self.get() + main_html = [] + container_id = uuid.uuid4() + main_html.append( + "" + % container_id + ) + main_html.append( + "
%s
" + % (container_id, self.get()) + ) + return "\n".join(main_html) class CardContainer: @@ -124,6 +144,8 @@ def _get_card(self, index): self._card_ds, card_info.type, path, + card_info.hash, + id=card_info.id, html=None, created_on=None, ) @@ -136,11 +158,19 @@ def _repr_html_(self): for idx, _ in enumerate(self._card_paths): card = self._get_card(idx) main_html.append(self._make_heading(card.type)) - main_html.append("
%s
" % card.get()) + container_id = uuid.uuid4() + main_html.append( + "" + % container_id + ) + main_html.append( + "
%s
" + % (container_id, card.get()) + ) return "\n".join(main_html) -def get_cards(task, type=None, follow_resumed=True): +def get_cards(task, id=None, type=None, follow_resumed=True): """ Get cards related to a Metaflow `Task` @@ -155,6 +185,7 @@ def get_cards(task, type=None, follow_resumed=True): from metaflow.client import Task from metaflow import namespace + card_id = id if isinstance(task, str): task_str = task if len(task_str.split("/")) != 4: @@ -173,9 +204,7 @@ def get_cards(task, type=None, follow_resumed=True): task = Task(origin_taskpathspec) card_paths, card_ds = resolve_paths_from_task( - _get_flow_datastore(task), - pathspec=task.pathspec, - type=type, + _get_flow_datastore(task), pathspec=task.pathspec, type=type, card_id=card_id ) return CardContainer( card_paths, diff --git a/metaflow/plugins/cards/card_datastore.py b/metaflow/plugins/cards/card_datastore.py index 69c479879e8..a31c26425ad 100644 --- a/metaflow/plugins/cards/card_datastore.py +++ b/metaflow/plugins/cards/card_datastore.py @@ -21,7 +21,7 @@ TEMP_DIR_NAME = "metaflow_card_cache" NUM_SHORT_HASH_CHARS = 5 -CardInfo = namedtuple("CardInfo", ["type", "hash"]) +CardInfo = namedtuple("CardInfo", ["type", "hash", "id", "filename"]) def path_spec_resolver(pathspec): @@ -76,11 +76,13 @@ def __init__(self, flow_datastore, pathspec=None): self._temp_card_save_path = self._get_card_path(base_pth=TEMP_DIR_NAME) @classmethod - def get_card_location(cls, base_path, card_name, card_html): - return os.path.join( - base_path, - "%s-%s.html" % (card_name, sha1(bytes(card_html, "utf-8")).hexdigest()), - ) + def get_card_location(cls, base_path, card_name, card_html, card_id=None): + chash = sha1(bytes(card_html, "utf-8")).hexdigest() + if card_id is None: + card_file_name = "%s-%s.html" % (card_name, chash) + else: + card_file_name = "%s-%s-%s.html" % (card_name, card_id, chash) + return os.path.join(base_path, card_file_name) def _make_path(self, base_pth, pathspec=None): sysroot = base_pth @@ -124,35 +126,40 @@ def card_info_from_path(path): """ card_file_name = path.split("/")[-1] file_split = card_file_name.split("-") - if len(file_split) != 2: + + if len(file_split) not in [2, 3]: raise Exception( - "Invalid card file name %s. Card file names should be of form TYPE-HASH.html" + "Invalid card file name %s. Card file names should be of form TYPE-HASH.html or TYPE-ID-HASH.html" % card_file_name ) - card_type, card_hash = None, None - card_type, card_hash = file_split + card_type, card_hash, card_id = None, None, None + + if len(file_split) == 2: + card_type, card_hash = file_split + else: + card_type, card_id, card_hash = file_split + card_hash = card_hash.split(".html")[0] - return CardInfo(card_type, card_hash) + return CardInfo(card_type, card_hash, card_id, card_file_name) - def save_card(self, card_type, card_html, overwrite=True): + def save_card(self, card_type, card_html, card_id=None, overwrite=True): card_file_name = card_type card_path = self.get_card_location( - self._get_card_path(), card_file_name, card_html + self._get_card_path(), card_file_name, card_html, card_id=card_id ) self._backend.save_bytes( [(card_path, BytesIO(bytes(card_html, "utf-8")))], overwrite=overwrite ) return self.card_info_from_path(card_path) - def _list_card_paths(self, card_type=None, card_hash=None): + def _list_card_paths(self, card_type=None, card_hash=None, card_id=None): card_path = self._get_card_path() + card_paths = self._backend.list_content([card_path]) if len(card_paths) == 0: # If there are no files found on the Path then raise an error of raise CardNotPresentException( - self._flow_name, - self._run_id, - self._step_name, + self._pathspec, card_hash=card_hash, card_type=card_type, ) @@ -163,17 +170,19 @@ def _list_card_paths(self, card_type=None, card_hash=None): if card_type is not None and card_info.type != card_type: continue elif card_hash is not None: - if ( - card_info.hash != card_hash - and card_hash != card_info.hash[:NUM_SHORT_HASH_CHARS] - ): + if not card_info.hash.startswith(card_hash): continue + elif card_id is not None and card_info.id != card_id: + continue if task_card_path.is_file: cards_found.append(card_path) return cards_found + def create_full_path(self, card_path): + return os.path.join(self._backend.datastore_root, card_path) + def get_card_names(self, card_paths): return [self.card_info_from_path(path) for path in card_paths] @@ -184,21 +193,32 @@ def get_card_html(self, path): with open(path, "r") as f: return f.read() - def cache_locally(self, path): + def cache_locally(self, path, save_path=None): + """ + Saves the data present in the `path` the `metaflow_card_cache` directory or to the `save_path`. + """ # todo : replace this function with the FileCache - if not is_file_present(self._temp_card_save_path): - LocalStorage._makedirs(self._temp_card_save_path) + if save_path is None: + if not is_file_present(self._temp_card_save_path): + LocalStorage._makedirs(self._temp_card_save_path) + else: + save_dir = os.path.dirname(save_path) + if save_dir != "" and not is_file_present(save_dir): + LocalStorage._makedirs(os.path.dirname(save_path)) + with self._backend.load_bytes([path]) as get_results: for key, path, meta in get_results: if path is not None: main_path = path - file_name = key.split("/")[-1] - main_path = os.path.join(self._temp_card_save_path, file_name) + if save_path is None: + file_name = key.split("/")[-1] + main_path = os.path.join(self._temp_card_save_path, file_name) + else: + main_path = save_path shutil.copy(path, main_path) return main_path - def extract_card_paths(self, card_type=None, card_hash=None): + def extract_card_paths(self, card_type=None, card_hash=None, card_id=None): return self._list_card_paths( - card_type=card_type, - card_hash=card_hash, + card_type=card_type, card_hash=card_hash, card_id=card_id ) diff --git a/metaflow/plugins/cards/card_decorator.py b/metaflow/plugins/cards/card_decorator.py index 436864d3535..fa02905eed3 100644 --- a/metaflow/plugins/cards/card_decorator.py +++ b/metaflow/plugins/cards/card_decorator.py @@ -1,17 +1,25 @@ import subprocess import os +import tempfile import sys import json from metaflow.decorators import StepDecorator, flow_decorators from metaflow.current import current from metaflow.util import to_unicode +from .component_serializer import CardComponentCollector, get_card_class +from .card_modules import _get_external_card_package_paths + # from metaflow import get_metadata import re -CARD_ID_PATTERN = re.compile( - "^[a-zA-Z0-9_]+$", -) +from .exception import CARD_ID_PATTERN, TYPE_CHECK_REGEX + + +def warning_message(message, logger=None, ts=False): + msg = "[@card WARNING] %s" % message + if logger: + logger(msg, timestamp=ts, bad=True) class CardDecorator(StepDecorator): @@ -21,17 +29,27 @@ class CardDecorator(StepDecorator): "options": {}, "scope": "task", "timeout": 45, + "id": None, "save_errors": True, + "customize": False, } allow_multiple = True + total_decos_on_step = {} + + step_counter = 0 + + _called_once = {} + def __init__(self, *args, **kwargs): super(CardDecorator, self).__init__(*args, **kwargs) self._task_datastore = None self._environment = None self._metadata = None self._logger = None - # todo : first allow multiple decorators with a step + self._is_editable = False + self._card_uuid = None + self._user_set_card_id = None def add_to_package(self): return list(self._load_card_package()) @@ -42,13 +60,26 @@ def _load_card_package(self): card_modules_root = os.path.dirname(card_modules.__file__) - for path_tuple in self._walk(card_modules_root): + for path_tuple in self._walk( + card_modules_root, filter_extensions=[".html", ".js", ".css"] + ): file_path, arcname = path_tuple yield (file_path, os.path.join("metaflow", "plugins", "cards", arcname)) - def _walk(self, root): + external_card_pth_generator = _get_external_card_package_paths() + if external_card_pth_generator is None: + return + for module_pth, parent_arcname in external_card_pth_generator: + # `_get_card_package_paths` is a generator which yields + # path to the module and its relative arcname in the metaflow-extensions package. + for file_pth, rel_path in self._walk(module_pth, prefix_root=True): + arcname = os.path.join(parent_arcname, rel_path) + yield (file_pth, arcname) + + def _walk(self, root, filter_extensions=[], prefix_root=False): root = to_unicode(root) # handle files/folder with non ascii chars - prefixlen = len("%s/" % os.path.dirname(root)) + prfx = "%s/" % (root if prefix_root else os.path.dirname(root)) + prefixlen = len(prfx) for path, dirs, files in os.walk(root): for fname in files: # ignoring filesnames which are hidden; @@ -56,17 +87,36 @@ def _walk(self, root): if fname[0] == ".": continue - # TODO: This prevents redundant packaging of .py files for the - # default card. We should fix this logic to allow .py files to - # be included for custom cards. - if any(fname.endswith(s) for s in [".html", ".js", ".css"]): - p = os.path.join(path, fname) - yield p, p[prefixlen:] + if len(filter_extensions) > 0 and not any( + fname.endswith(s) for s in filter_extensions + ): + continue + p = os.path.join(path, fname) + yield p, p[prefixlen:] + + def _is_event_registered(self, evt_name): + return evt_name in self._called_once + + @classmethod + def _register_event(cls, evt_name): + if evt_name not in cls._called_once: + cls._called_once[evt_name] = True + + @classmethod + def _set_card_counts_per_step(cls, step_name, total_count): + cls.total_decos_on_step[step_name] = total_count + + @classmethod + def _increment_step_counter(cls): + cls.step_counter += 1 def step_init( self, flow, graph, step_name, decorators, environment, flow_datastore, logger ): + self._flow_datastore = flow_datastore + self._environment = environment + self._logger = logger self.card_options = None # Populate the defaults which may be missing. @@ -84,9 +134,19 @@ def step_init( else: self.card_options = self.attributes["options"] - self._flow_datastore = flow_datastore - self._environment = environment - self._logger = logger + evt_name = "step-init" + # `'%s-%s'%(evt_name,step_name)` ensures that we capture this once per @card per @step. + # Since there can be many steps checking if event is registered for `evt_name` will only make it check it once for all steps. + # Hence we have `_is_event_registered('%s-%s'%(evt_name,step_name))` + if not self._is_event_registered("%s-%s" % (evt_name, step_name)): + # We set the total count of decorators so that we can use it for + # when calling the finalize function of CardComponentCollector + # We set the total @card per step via calling `_set_card_counts_per_step`. + other_card_decorators = [ + deco for deco in decorators if isinstance(deco, self.__class__) + ] + self._set_card_counts_per_step(step_name, len(other_card_decorators)) + self._register_event(evt_name) def task_pre_step( self, @@ -102,6 +162,56 @@ def task_pre_step( ubf_context, inputs, ): + card_type = self.attributes["type"] + card_class = get_card_class(card_type) + if card_class is not None: # Card type was not found + if card_class.ALLOW_USER_COMPONENTS: + self._is_editable = True + # We have a step counter to ensure that on calling the final card decorator's `task_pre_step` + # we call a `finalize` function in the `CardComponentCollector`. + # This can help ensure the behaviour of the `current.card` object is according to specification. + self._increment_step_counter() + self._user_set_card_id = self.attributes["id"] + if ( + self.attributes["id"] is not None + and re.match(CARD_ID_PATTERN, self.attributes["id"]) is None + ): + # There should be a warning issued to the user that `id` doesn't match regex pattern + # Since it is doesn't match pattern, we need to ensure that `id` is not accepted by `current` + # and warn users that they cannot use id for thier arguements. + wrn_msg = ( + "@card with id '%s' doesn't match REGEX pattern. " + "Adding custom components to cards will not be accessible via `current.card['%s']`. " + "Please create `id` of pattern %s. " + ) % (self.attributes["id"], self.attributes["id"], TYPE_CHECK_REGEX) + warning_message(wrn_msg, self._logger) + self._user_set_card_id = None + + # As we have multiple decorators, + # we need to ensure that `current.card` has `CardComponentCollector` instantiated only once. + if not self._is_event_registered("pre-step"): + self._register_event("pre-step") + current._update_env({"card": CardComponentCollector(self._logger)}) + + # this line happens because of decospecs parsing. + customize = False + if str(self.attributes["customize"]) == "True": + customize = True + + card_metadata = current.card._add_card( + self.attributes["type"], + self._user_set_card_id, + self._is_editable, + customize, + ) + self._card_uuid = card_metadata["uuid"] + + # This means that the we are calling `task_pre_step` on the last card decorator. + # We can now `finalize` method in the CardComponentCollector object. + # This will setup the `current.card` object for usage inside `@step` code. + if self.step_counter == self.total_decos_on_step[step_name]: + current.card._finalize() + self._task_datastore = task_datastore self._metadata = metadata @@ -110,8 +220,9 @@ def task_finished( ): if not is_task_ok: return + component_strings = current.card._serialize_components(self._card_uuid) runspec = "/".join([current.run_id, current.step_name, current.task_id]) - self._run_cards_subprocess(runspec) + self._run_cards_subprocess(runspec, component_strings) @staticmethod def _options(mapping): @@ -143,7 +254,12 @@ def _create_top_level_args(self): top_level_options.update(deco.get_top_level_options()) return list(self._options(top_level_options)) - def _run_cards_subprocess(self, runspec): + def _run_cards_subprocess(self, runspec, component_strings): + temp_file = None + if len(component_strings) > 0: + temp_file = tempfile.NamedTemporaryFile("w", suffix=".json") + json.dump(component_strings, temp_file) + temp_file.seek(0) executable = sys.executable cmd = [ executable, @@ -165,9 +281,16 @@ def _run_cards_subprocess(self, runspec): if self.attributes["timeout"] is not None: cmd += ["--timeout", str(self.attributes["timeout"])] - if self.attributes["save_errors"]: + if self._user_set_card_id is not None: + cmd += ["--id", str(self._user_set_card_id)] + + # Doing this because decospecs parse information as str, since some non-runtime decorators pass it as bool we parse bool to str + if str(self.attributes["save_errors"]) == "True": cmd += ["--render-error-card"] + if temp_file is not None: + cmd += ["--component-file", temp_file.name] + response, fail = self._run_command( cmd, os.environ, timeout=self.attributes["timeout"] ) diff --git a/metaflow/plugins/cards/card_modules/__init__.py b/metaflow/plugins/cards/card_modules/__init__.py index 2c9d517ac38..cd1e3105d60 100644 --- a/metaflow/plugins/cards/card_modules/__init__.py +++ b/metaflow/plugins/cards/card_modules/__init__.py @@ -1 +1,112 @@ +import os +import traceback from .card import MetaflowCard, MetaflowCardComponent +from metaflow.extension_support import get_modules, EXT_PKG, _ext_debug + + +def iter_namespace(ns_pkg): + # Specifying the second argument (prefix) to iter_modules makes the + # returned name an absolute name instead of a relative one. This allows + # import_module to work without having to do additional modification to + # the name. + import pkgutil + + return pkgutil.iter_modules(ns_pkg.__path__, ns_pkg.__name__ + ".") + + +def _get_external_card_packages(with_paths=False): + """ + Safely extract all exteranl card modules + Args: + with_paths (bool, optional): setting `with_paths=True` will result in a list of tuples: `[( mf_extensions_parent_path , relative_path_to_module, module)]`. setting false will return a list of modules Defaults to False. + + Returns: + `list` of `ModuleType` or `list` of `tuples` where each tuple if of the form (mf_extensions_parent_path , relative_path_to_module, ModuleType) + """ + import importlib + + available_card_modules = [] + for m in get_modules("plugins.cards"): + # Iterate submodules of metaflow_extensions.X.plugins.cards + # For example metaflow_extensions.X.plugins.cards.monitoring + card_packages = [] + for fndx, card_mod, ispkg_c in iter_namespace(m.module): + try: + if not ispkg_c: + continue + cm = importlib.import_module(card_mod) + _ext_debug("Importing card package %s" % card_mod) + if with_paths: + card_packages.append((fndx.path, cm)) + else: + card_packages.append(cm) + except Exception as e: + _ext_debug( + "External Card Module Import Exception \n\n %s \n\n %s" + % (str(e), traceback.format_exc()) + ) + if with_paths: + card_packages = [ + ( + os.path.abspath( + os.path.join(pth, "../../../../") + ), # parent path to metaflow_extensions + os.path.join( + EXT_PKG, + os.path.relpath(m.__path__[0], os.path.join(pth, "../../../")), + ), # construct relative path to parent. + m, + ) + for pth, m in card_packages + ] + available_card_modules.extend(card_packages) + return available_card_modules + + +def _load_external_cards(): + # Load external card packages + card_packages = _get_external_card_packages() + if not card_packages: + return [] + external_cards = {} + card_arr = [] + # Load cards from all external packages. + for package in card_packages: + try: + cards = package.CARDS + # Ensure that types match. + if not type(cards) == list: + continue + except AttributeError: + continue + else: + for c in cards: + if not isinstance(c, type) or not issubclass(c, MetaflowCard): + # every card should only be inheriting a MetaflowCard + continue + if not getattr(c, "type", None): + # todo Warn user of non existant `type` in MetaflowCard + continue + if c.type in external_cards: + # todo Warn user of duplicate card + continue + # external_cards[c.type] = c + card_arr.append(c) + return card_arr + + +def _get_external_card_package_paths(): + pkg_iter = _get_external_card_packages(with_paths=True) + if pkg_iter is None: + return None + for ( + mf_extension_parent_path, + relative_path_to_module, + _, + ) in pkg_iter: + module_pth = os.path.join(mf_extension_parent_path, relative_path_to_module) + arcname = relative_path_to_module + yield module_pth, arcname + + +MF_EXTERNAL_CARDS = _load_external_cards() diff --git a/metaflow/plugins/cards/card_modules/base.html b/metaflow/plugins/cards/card_modules/base.html index 8c6d8e0d50b..c61eb9e672a 100644 --- a/metaflow/plugins/cards/card_modules/base.html +++ b/metaflow/plugins/cards/card_modules/base.html @@ -16,9 +16,14 @@ -
+
- {id} - {id} + {/if} + {artifact.data} diff --git a/metaflow/plugins/cards/ui/src/components/artifacts.svelte b/metaflow/plugins/cards/ui/src/components/artifacts.svelte index bb974d0391e..72309d41225 100644 --- a/metaflow/plugins/cards/ui/src/components/artifacts.svelte +++ b/metaflow/plugins/cards/ui/src/components/artifacts.svelte @@ -7,27 +7,30 @@ const { data } = componentData; // we can't guarantee the data is sorted from the source, so we sort before render - const sortedData = Object.entries(data).sort((a, b) => { - if (a[0] > b[0]) { - return 1; - } else if (a[0] < b[0]) { - return -1; + const sortedData = data.sort((a, b) => { + // nulls first + if (a.name && b.name) { + if (a.name > b.name) { + return 1; + } else if (a.name < b.name) { + return -1; + } } return 0; }); -
+
- {#each sortedData as [k, v]} - + {#each sortedData as artifact} + {/each}