Source code for gigl.common.utils.kfp

from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Optional

import numpy as np
import pandas as pd

from gigl.common import LocalUri
from gigl.common.logger import Logger

[docs] logger = Logger()
[docs] class KfpOutputViewers: def __init__(self) -> None: """ To enable component ui visualizations, your component must have an output path called mlpipeline_ui_metadata and export a JSON-serialized dictionary to that path that contains metadata of what visualizations to render. Example: component.yaml >>> outputs: >>> - {name: mlpipeline_ui_metadata, type: UI metadata} >>> ... >>> command: [ >>> python, -m, some_script.py, >>> --mlpipeline_ui_metadata_path, {outputPath: mlpipeline_ui_metadata}, some_script.py >>> if __name__ == "__main__": >>> parser = argparse.ArgumentParser() >>> parser.add_argument( ... "--mlpipeline_ui_metadata_path", ... type=str, ... help="The file path to output the visualization metadata to", ... ) >>> args = parser.parse_args() >>> outputs = KfpOutputViewers() >>> outputs.add_confusion_matrix( ... confusion_matrix=array([[2, 0, 0], [0, 0, 1], [1, 0, 2]]) ... ) >>> outputs.write_to_output_viewer_path(args.mlpipeline_ui_metadata_path) https://www.kubeflow.org/docs/components/pipelines/sdk/output-viewer/ """ self.__outputs_list: list[dict[str, Any]] = []
[docs] def add_confusion_matrix( self, confusion_matrix: np.ndarray, vocab: Optional[list[str]] = None ) -> None: if vocab is None: vocab = [str(i) for i in range(confusion_matrix.shape[0])] data = [] for target_index, target_row in enumerate(confusion_matrix): for predicted_index, count in enumerate(target_row): data.append((vocab[target_index], vocab[predicted_index], count)) df_confusion_matrix = pd.DataFrame( data, columns=["target", "predicted", "count"] ) confusion_matrix_str = df_confusion_matrix.to_csv( columns=["target", "predicted", "count"], header=False, index=False ) self.__outputs_list.append( { "type": "confusion_matrix", "format": "csv", "schema": [ {"name": "target", "type": "CATEGORY"}, {"name": "predicted", "type": "CATEGORY"}, {"name": "count", "type": "NUMBER"}, ], "source": confusion_matrix_str, "storage": "inline", # Convert vocab to string because for boolean values we want "True|False" to match csv data. "labels": list(map(str, vocab)), } )
[docs] def write_to_output_viewer_path(self, path: LocalUri) -> None: metadata = {"outputs": self.__outputs_list} Path(path.uri).parent.mkdir(parents=True, exist_ok=True) with open(path.uri, "w") as f: json.dump(metadata, f)