from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Optional
import numpy as np
import pandas as pd
from gigl.common import LocalUri
from gigl.common.logger import Logger
[docs]
class KfpOutputViewers:
def __init__(self) -> None:
"""
To enable component ui visualizations, your component must have an output path called
mlpipeline_ui_metadata and export a JSON-serialized dictionary to that path that contains
metadata of what visualizations to render.
Example:
component.yaml
>>> outputs:
>>> - {name: mlpipeline_ui_metadata, type: UI metadata}
>>> ...
>>> command: [
>>> python, -m, some_script.py,
>>> --mlpipeline_ui_metadata_path, {outputPath: mlpipeline_ui_metadata},
some_script.py
>>> if __name__ == "__main__":
>>> parser = argparse.ArgumentParser()
>>> parser.add_argument(
... "--mlpipeline_ui_metadata_path",
... type=str,
... help="The file path to output the visualization metadata to",
... )
>>> args = parser.parse_args()
>>> outputs = KfpOutputViewers()
>>> outputs.add_confusion_matrix(
... confusion_matrix=array([[2, 0, 0], [0, 0, 1], [1, 0, 2]])
... )
>>> outputs.write_to_output_viewer_path(args.mlpipeline_ui_metadata_path)
https://www.kubeflow.org/docs/components/pipelines/sdk/output-viewer/
"""
self.__outputs_list: list[dict[str, Any]] = []
[docs]
def add_confusion_matrix(
self, confusion_matrix: np.ndarray, vocab: Optional[list[str]] = None
) -> None:
if vocab is None:
vocab = [str(i) for i in range(confusion_matrix.shape[0])]
data = []
for target_index, target_row in enumerate(confusion_matrix):
for predicted_index, count in enumerate(target_row):
data.append((vocab[target_index], vocab[predicted_index], count))
df_confusion_matrix = pd.DataFrame(
data, columns=["target", "predicted", "count"]
)
confusion_matrix_str = df_confusion_matrix.to_csv(
columns=["target", "predicted", "count"], header=False, index=False
)
self.__outputs_list.append(
{
"type": "confusion_matrix",
"format": "csv",
"schema": [
{"name": "target", "type": "CATEGORY"},
{"name": "predicted", "type": "CATEGORY"},
{"name": "count", "type": "NUMBER"},
],
"source": confusion_matrix_str,
"storage": "inline",
# Convert vocab to string because for boolean values we want "True|False" to match csv data.
"labels": list(map(str, vocab)),
}
)
[docs]
def write_to_output_viewer_path(self, path: LocalUri) -> None:
metadata = {"outputs": self.__outputs_list}
Path(path.uri).parent.mkdir(parents=True, exist_ok=True)
with open(path.uri, "w") as f:
json.dump(metadata, f)