From 4a4d44a86c0c309b9d16b2bc68ed84f94d1f3abf Mon Sep 17 00:00:00 2001 From: Christine Draper Date: Tue, 14 May 2024 15:54:39 -0400 Subject: [PATCH 1/2] Fix some formatting issues with tables in text diagnoser report --- .../customized_diagnoser.ipynb | 163 ++++++++------- examples/example_notebooks/diagnoser.ipynb | 194 +++++++++--------- .../diagnoser/models/diagnosis_report.py | 9 +- 3 files changed, 180 insertions(+), 186 deletions(-) diff --git a/examples/example_notebooks/customized_diagnoser.ipynb b/examples/example_notebooks/customized_diagnoser.ipynb index 8268621..4f039c5 100644 --- a/examples/example_notebooks/customized_diagnoser.ipynb +++ b/examples/example_notebooks/customized_diagnoser.ipynb @@ -17,14 +17,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 20, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-05-03T20:44:40.525488Z", - "start_time": "2024-05-03T20:44:40.522741Z" + "end_time": "2024-05-14T19:56:20.461774Z", + "start_time": "2024-05-14T19:56:20.459107Z" } }, "outputs": [], @@ -47,12 +47,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 21, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:48.257815Z", - "start_time": "2024-05-03T20:44:40.549686Z" + "end_time": "2024-05-14T19:56:26.198940Z", + "start_time": "2024-05-14T19:56:20.491315Z" } }, "outputs": [], @@ -84,12 +84,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 22, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:48.555773Z", - "start_time": "2024-05-03T20:44:48.259295Z" + "end_time": "2024-05-14T19:56:26.209383Z", + "start_time": "2024-05-14T19:56:26.201460Z" } }, "outputs": [], @@ -110,19 +110,19 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 23, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:48.967016Z", - "start_time": "2024-05-03T20:44:48.556657Z" + "end_time": "2024-05-14T19:56:26.626871Z", + "start_time": "2024-05-14T19:56:26.210469Z" } }, "outputs": [ { "data": { - "text/plain": "(TimeRange(start=datetime.datetime(2021, 5, 20, 0, 0, tzinfo=datetime.timezone.utc), end=datetime.datetime(2024, 5, 2, 21, 0, tzinfo=datetime.timezone.utc)),\n ,\n '2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z')" + "text/plain": "(TimeRange(start=datetime.datetime(2021, 5, 20, 0, 0, tzinfo=datetime.timezone.utc), end=datetime.datetime(2024, 5, 13, 21, 0, tzinfo=datetime.timezone.utc)),\n ,\n '2024-04-13T00:00:00.000Z/2024-05-13T00:00:00.000Z')" }, - "execution_count": 4, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -143,21 +143,21 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 24, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:49.668954Z", - "start_time": "2024-05-03T20:44:48.968682Z" + "end_time": "2024-05-14T19:56:27.372526Z", + "start_time": "2024-05-14T19:56:26.629839Z" } }, "outputs": [ { "data": { - "text/plain": " monitor_id \\\n0 frequent-items-drift-monitor-x2hr9z \n1 discrete-distribution-22ef37c9-monitor \n2 smoggy-chartreuse-owl-3387 \n3 frequent-items-drift-monitor-bx6m80 \n4 frequent-items-drift-monitor-mat0jo \n5 frequent-items-drift-monitor-01rbfl \n6 frequent-items-drift-monitor-0foigt \n7 frequent-items-drift-monitor-3c0hc2 \n8 frequent-items-drift-monitor-9gmtix \n9 elated-palegreen-jaguar-6432 \n10 inferred-data-type-fec5a735-monitor \n11 unique-ratio-b7b84aee-monitor \n12 missing-values-ratio-35881327-monitor \n13 numerical-drift-monitor-6oxi83 \n14 numerical-drift-monitor-8yugth \n15 continuous-distribution-956a280c-monitor \n16 dull-floralwhite-raven-5521 \n\n analyzer_id metric column_count \\\n0 frequent-items-drift-analyzer-x2hr9z frequent_items 3 \n1 discrete-distribution-22ef37c9 frequent_items 3 \n2 smoggy-chartreuse-owl-3387-analyzer frequent_items 3 \n3 frequent-items-drift-analyzer-bx6m80 frequent_items 3 \n4 frequent-items-drift-analyzer-mat0jo frequent_items 3 \n5 frequent-items-drift-analyzer-01rbfl frequent_items 3 \n6 frequent-items-drift-analyzer-0foigt frequent_items 3 \n7 frequent-items-drift-analyzer-3c0hc2 frequent_items 3 \n8 frequent-items-drift-analyzer-9gmtix frequent_items 3 \n9 elated-palegreen-jaguar-6432-analyzer histogram 9 \n10 inferred-data-type-fec5a735 inferred_data_type 1 \n11 unique-ratio-b7b84aee unique_est_ratio 69 \n12 missing-values-ratio-35881327 count_null_ratio 21 \n13 numerical-drift-analyzer-6oxi83 histogram 1 \n14 numerical-drift-analyzer-8yugth histogram 1 \n15 continuous-distribution-956a280c histogram 1 \n16 dull-floralwhite-raven-5521-analyzer count 2 \n\n segment_count anomaly_count max_anomaly_per_column \\\n0 1 34 30 \n1 1 34 30 \n2 1 34 30 \n3 1 34 30 \n4 1 34 30 \n5 1 34 30 \n6 1 34 30 \n7 1 34 30 \n8 1 34 30 \n9 1 75 19 \n10 1 14 14 \n11 1 104 4 \n12 1 27 3 \n13 1 2 2 \n14 1 2 2 \n15 1 2 2 \n16 1 3 2 \n\n min_anomaly_per_column avg_anomaly_per_column action_count \\\n0 1 11 1 \n1 1 11 0 \n2 1 11 0 \n3 1 11 0 \n4 1 11 2 \n5 1 11 1 \n6 1 11 0 \n7 1 11 1 \n8 1 11 1 \n9 2 8 0 \n10 14 14 2 \n11 1 1 0 \n12 1 1 0 \n13 2 2 0 \n14 2 2 0 \n15 2 2 0 \n16 1 1 0 \n\n action_targets \n0 [email] \n1 [] \n2 [] \n3 [] \n4 [email, slack] \n5 [email] \n6 [] \n7 [email] \n8 [email] \n9 [] \n10 [email, slack] \n11 [] \n12 [] \n13 [] \n14 [] \n15 [] \n16 [] ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1discrete-distribution-22ef37c9-monitordiscrete-distribution-22ef37c9frequent_items3134301110[]
2smoggy-chartreuse-owl-3387smoggy-chartreuse-owl-3387-analyzerfrequent_items3134301110[]
3frequent-items-drift-monitor-bx6m80frequent-items-drift-analyzer-bx6m80frequent_items3134301110[]
4frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
5frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
6frequent-items-drift-monitor-0foigtfrequent-items-drift-analyzer-0foigtfrequent_items3134301110[]
7frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
8frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
9elated-palegreen-jaguar-6432elated-palegreen-jaguar-6432-analyzerhistogram917519280[]
10inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
11unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio6911044110[]
12missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio211273110[]
13numerical-drift-monitor-6oxi83numerical-drift-analyzer-6oxi83histogram1122220[]
14numerical-drift-monitor-8yugthnumerical-drift-analyzer-8yugthhistogram1122220[]
15continuous-distribution-956a280c-monitorcontinuous-distribution-956a280chistogram1122220[]
16dull-floralwhite-raven-5521dull-floralwhite-raven-5521-analyzercount2132110[]
\n
" + "text/plain": " monitor_id \\\n0 frequent-items-drift-monitor-bx6m80 \n1 discrete-distribution-22ef37c9-monitor \n2 smoggy-chartreuse-owl-3387 \n3 frequent-items-drift-monitor-mat0jo \n4 frequent-items-drift-monitor-01rbfl \n5 frequent-items-drift-monitor-0foigt \n6 frequent-items-drift-monitor-3c0hc2 \n7 frequent-items-drift-monitor-9gmtix \n8 elated-palegreen-jaguar-6432 \n9 frequent-items-drift-monitor-x2hr9z \n10 inferred-data-type-fec5a735-monitor \n11 continuous-distribution-956a280c-monitor \n12 numerical-drift-monitor-6oxi83 \n13 numerical-drift-monitor-8yugth \n14 missing-values-ratio-35881327-monitor \n15 unique-ratio-b7b84aee-monitor \n16 dull-floralwhite-raven-5521 \n\n analyzer_id metric column_count \\\n0 frequent-items-drift-analyzer-bx6m80 frequent_items 3 \n1 discrete-distribution-22ef37c9 frequent_items 3 \n2 smoggy-chartreuse-owl-3387-analyzer frequent_items 3 \n3 frequent-items-drift-analyzer-mat0jo frequent_items 3 \n4 frequent-items-drift-analyzer-01rbfl frequent_items 3 \n5 frequent-items-drift-analyzer-0foigt frequent_items 3 \n6 frequent-items-drift-analyzer-3c0hc2 frequent_items 3 \n7 frequent-items-drift-analyzer-9gmtix frequent_items 3 \n8 elated-palegreen-jaguar-6432-analyzer histogram 8 \n9 frequent-items-drift-analyzer-x2hr9z frequent_items 3 \n10 inferred-data-type-fec5a735 inferred_data_type 1 \n11 continuous-distribution-956a280c histogram 1 \n12 numerical-drift-analyzer-6oxi83 histogram 1 \n13 numerical-drift-analyzer-8yugth histogram 1 \n14 missing-values-ratio-35881327 count_null_ratio 21 \n15 unique-ratio-b7b84aee unique_est_ratio 68 \n16 dull-floralwhite-raven-5521-analyzer count 2 \n\n segment_count anomaly_count max_anomaly_per_column \\\n0 1 38 30 \n1 1 38 30 \n2 1 38 30 \n3 1 38 30 \n4 1 38 30 \n5 1 38 30 \n6 1 38 30 \n7 1 38 30 \n8 1 81 25 \n9 1 22 19 \n10 1 18 18 \n11 1 8 8 \n12 1 8 8 \n13 1 8 8 \n14 1 30 4 \n15 1 103 3 \n16 1 2 1 \n\n min_anomaly_per_column avg_anomaly_per_column action_count \\\n0 2 12 0 \n1 2 12 0 \n2 2 12 0 \n3 2 12 2 \n4 2 12 1 \n5 2 12 0 \n6 2 12 1 \n7 2 12 1 \n8 2 10 0 \n9 1 7 1 \n10 18 18 2 \n11 8 8 0 \n12 8 8 0 \n13 8 8 0 \n14 1 1 0 \n15 1 1 0 \n16 1 1 0 \n\n action_targets \n0 [] \n1 [] \n2 [] \n3 [email, slack] \n4 [email] \n5 [] \n6 [email] \n7 [email] \n8 [] \n9 [email] \n10 [email, slack] \n11 [] \n12 [] \n13 [] \n14 [] \n15 [] \n16 [] ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-bx6m80frequent-items-drift-analyzer-bx6m80frequent_items3138302120[]
1discrete-distribution-22ef37c9-monitordiscrete-distribution-22ef37c9frequent_items3138302120[]
2smoggy-chartreuse-owl-3387smoggy-chartreuse-owl-3387-analyzerfrequent_items3138302120[]
3frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3138302122[email, slack]
4frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3138302121[email]
5frequent-items-drift-monitor-0foigtfrequent-items-drift-analyzer-0foigtfrequent_items3138302120[]
6frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3138302121[email]
7frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3138302121[email]
8elated-palegreen-jaguar-6432elated-palegreen-jaguar-6432-analyzerhistogram8181252100[]
9frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items312219171[email]
10inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11181818182[email, slack]
11continuous-distribution-956a280c-monitorcontinuous-distribution-956a280chistogram1188880[]
12numerical-drift-monitor-6oxi83numerical-drift-analyzer-6oxi83histogram1188880[]
13numerical-drift-monitor-8yugthnumerical-drift-analyzer-8yugthhistogram1188880[]
14missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio211304110[]
15unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio6811033110[]
16dull-floralwhite-raven-5521dull-floralwhite-raven-5521-analyzercount2121110[]
\n
" }, - "execution_count": 5, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -181,12 +181,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 25, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:49.675298Z", - "start_time": "2024-05-03T20:44:49.669786Z" + "end_time": "2024-05-14T19:56:27.381587Z", + "start_time": "2024-05-14T19:56:27.374413Z" } }, "outputs": [ @@ -195,7 +195,7 @@ "text/plain": " monitor_id analyzer_id \\\n0 inferred-data-type-fec5a735-monitor inferred-data-type-fec5a735 \n1 missing-values-ratio-35881327-monitor missing-values-ratio-35881327 \n2 unique-ratio-b7b84aee-monitor unique-ratio-b7b84aee \n\n metric failed_count max_failed_per_column \\\n0 inferred_data_type 3 3 \n1 count_null_ratio 1 1 \n2 unique_est_ratio 1 1 \n\n min_failed_per_column avg_failed_per_column action_count action_targets \n0 3 3 2 [email, slack] \n1 1 1 0 [] \n2 1 1 0 [] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetricfailed_countmax_failed_per_columnmin_failed_per_columnavg_failed_per_columnaction_countaction_targets
0inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type33332[email, slack]
1missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio11110[]
2unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio11110[]
\n
" }, - "execution_count": 6, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -214,19 +214,19 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 26, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:49.678419Z", - "start_time": "2024-05-03T20:44:49.676141Z" + "end_time": "2024-05-14T19:56:27.385574Z", + "start_time": "2024-05-14T19:56:27.382543Z" } }, "outputs": [ { "data": { - "text/plain": "'frequent-items-drift-monitor-x2hr9z'" + "text/plain": "'frequent-items-drift-monitor-bx6m80'" }, - "execution_count": 7, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -246,20 +246,20 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 27, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:49.681587Z", - "start_time": "2024-05-03T20:44:49.679132Z" + "end_time": "2024-05-14T19:56:27.389072Z", + "start_time": "2024-05-14T19:56:27.386612Z" } }, "outputs": [ { "data": { - "text/plain": "Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536890090, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-x2hr9z', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-x2hr9z'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[GlobalAction(type='global', target='email')])" + "text/plain": "Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1711135588156, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-bx6m80', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-bx6m80'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[])" }, - "execution_count": 8, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -279,20 +279,20 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 28, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:49.878456Z", - "start_time": "2024-05-03T20:44:49.682343Z" + "end_time": "2024-05-14T19:56:27.584900Z", + "start_time": "2024-05-14T19:56:27.389808Z" } }, "outputs": [ { "data": { - "text/plain": "Analyzer(metadata=Metadata(version=6, schemaVersion=1, updatedTimestamp=1714769079201, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['issue_d', , 'desc', 'url'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None)))" + "text/plain": "Analyzer(metadata=Metadata(version=3, schemaVersion=1, updatedTimestamp=1715716552766, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-bx6m80', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['url', , 'desc', 'issue_d'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None)))" }, - "execution_count": 9, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -312,21 +312,21 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 29, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:50.042989Z", - "start_time": "2024-05-03T20:44:49.879144Z" + "end_time": "2024-05-14T19:56:27.805923Z", + "start_time": "2024-05-14T19:56:27.585634Z" } }, "outputs": [ { "data": { - "text/plain": " segment total_anomalies batch_count\n0 overall 34 30", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
segmenttotal_anomaliesbatch_count
0overall3430
\n
" + "text/plain": " segment total_anomalies batch_count\n0 overall 38 30", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
segmenttotal_anomaliesbatch_count
0overall3830
\n
" }, - "execution_count": 10, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -351,11 +351,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 30, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:50.047697Z", - "start_time": "2024-05-03T20:44:50.045178Z" + "end_time": "2024-05-14T19:56:27.812129Z", + "start_time": "2024-05-14T19:56:27.809188Z" } }, "outputs": [ @@ -363,7 +363,7 @@ "data": { "text/plain": "'overall'" }, - "execution_count": 11, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -383,20 +383,20 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 31, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:50.203811Z", - "start_time": "2024-05-03T20:44:50.048446Z" + "end_time": "2024-05-14T19:56:27.963449Z", + "start_time": "2024-05-14T19:56:27.813404Z" } }, "outputs": [ { "data": { - "text/plain": " column total_anomalies\n0 issue_d 30\n1 url 3\n2 desc 1\n3 disbursement_method 0\n4 earliest_cr_line 0\n5 emp_length 0\n6 emp_title 0\n7 grade 0\n8 hardship_flag 0\n9 home_ownership 0\n10 initial_list_status 0\n11 last_credit_pull_d 0\n12 last_pymnt_d 0\n13 loan_status 0\n14 next_pymnt_d 0\n15 purpose 0\n16 pymnt_plan 0\n17 sub_grade 0\n18 term 0\n19 title 0\n20 verification_status 0\n21 verification_status_joint 0\n22 addr_state 0\n23 zip_code 0\n24 application_type 0\n25 debt_settlement_flag 0", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
columntotal_anomalies
0issue_d30
1url3
2desc1
3disbursement_method0
4earliest_cr_line0
5emp_length0
6emp_title0
7grade0
8hardship_flag0
9home_ownership0
10initial_list_status0
11last_credit_pull_d0
12last_pymnt_d0
13loan_status0
14next_pymnt_d0
15purpose0
16pymnt_plan0
17sub_grade0
18term0
19title0
20verification_status0
21verification_status_joint0
22addr_state0
23zip_code0
24application_type0
25debt_settlement_flag0
\n
" + "text/plain": " column total_anomalies\n0 issue_d 30\n1 url 6\n2 desc 2\n3 disbursement_method 0\n4 earliest_cr_line 0\n5 emp_length 0\n6 emp_title 0\n7 grade 0\n8 hardship_flag 0\n9 home_ownership 0\n10 initial_list_status 0\n11 last_credit_pull_d 0\n12 last_pymnt_d 0\n13 loan_status 0\n14 next_pymnt_d 0\n15 purpose 0\n16 pymnt_plan 0\n17 sub_grade 0\n18 term 0\n19 title 0\n20 verification_status 0\n21 verification_status_joint 0\n22 addr_state 0\n23 zip_code 0\n24 application_type 0\n25 debt_settlement_flag 0", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
columntotal_anomalies
0issue_d30
1url6
2desc2
3disbursement_method0
4earliest_cr_line0
5emp_length0
6emp_title0
7grade0
8hardship_flag0
9home_ownership0
10initial_list_status0
11last_credit_pull_d0
12last_pymnt_d0
13loan_status0
14next_pymnt_d0
15purpose0
16pymnt_plan0
17sub_grade0
18term0
19title0
20verification_status0
21verification_status_joint0
22addr_state0
23zip_code0
24application_type0
25debt_settlement_flag0
\n
" }, - "execution_count": 12, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -416,11 +416,11 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 32, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:50.207865Z", - "start_time": "2024-05-03T20:44:50.204706Z" + "end_time": "2024-05-14T19:56:27.966888Z", + "start_time": "2024-05-14T19:56:27.964239Z" } }, "outputs": [ @@ -428,7 +428,7 @@ "data": { "text/plain": "['issue_d',\n 'url',\n 'desc',\n 'disbursement_method',\n 'earliest_cr_line',\n 'emp_length',\n 'emp_title',\n 'grade',\n 'hardship_flag',\n 'home_ownership',\n 'initial_list_status',\n 'last_credit_pull_d',\n 'last_pymnt_d',\n 'loan_status',\n 'next_pymnt_d',\n 'purpose',\n 'pymnt_plan',\n 'sub_grade',\n 'term',\n 'title',\n 'verification_status',\n 'verification_status_joint',\n 'addr_state',\n 'zip_code',\n 'application_type',\n 'debt_settlement_flag']" }, - "execution_count": 13, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -447,11 +447,11 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 33, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:54.750266Z", - "start_time": "2024-05-03T20:44:50.208637Z" + "end_time": "2024-05-14T19:56:32.346779Z", + "start_time": "2024-05-14T19:56:27.967796Z" } }, "outputs": [], @@ -463,12 +463,12 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 34, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:54.758004Z", - "start_time": "2024-05-03T20:44:54.751369Z" + "end_time": "2024-05-14T19:56:32.355851Z", + "start_time": "2024-05-14T19:56:32.347857Z" } }, "outputs": [ @@ -476,27 +476,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "Diagnosis is for monitor \"frequent-items-drift-monitor-x2hr9z\" [frequent-items-drift-monitor-x2hr9z] in model-0 org-0, over interval 2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z.\n", - "Monitor has 1 notification actions ['email'].\n", + "Diagnosis is for monitor \"frequent-items-drift-monitor-bx6m80\" [frequent-items-drift-monitor-bx6m80] in model-0 org-0, over interval 2024-04-13T00:00:00.000Z/2024-05-13T00:00:00.000Z.\n", "\n", "Analyzer is drift configuration for frequent_items metric with TrailingWindow baseline.\n", - "Analyzer \"frequent-items-drift-analyzer-x2hr9z\" targets 27 columns and ran on 26 columns in the diagnosed segment.\n", + "Analyzer \"frequent-items-drift-analyzer-bx6m80\" targets 27 columns and ran on 26 columns in the diagnosed segment.\n", "\n", "\n", "Diagnostic segment is \"overall\".\n", "Diagnostic interval contains 30 batches.\n", "\n", - "Diagnostic interval rollup contains 1674392 rows for the diagnosed columns.\n", + "Diagnostic interval rollup contains 1517489 rows for the diagnosed columns.\n", "\n", "Analysis results summary:\n", "Found non-failed results for 26 columns and 30 batches.\n", - "Found 34 anomalies in 3 columns, with up to 100.0% (30) batches having anomalies per column and 36.7% (11.0) on average.\n", + "Found 38 anomalies in 3 columns, with up to 100.0% (30) batches having anomalies per column and 40.0% (12.0) on average.\n", "Columns with anomalies are:\n", - "| | 0 |\n", - "|---:|:----------------|\n", - "| 0 | ('issue_d', 30) |\n", - "| 1 | ('url', 3) |\n", - "| 2 | ('desc', 1) |\n", + "| | column | count |\n", + "|---:|:---------|--------:|\n", + "| 0 | issue_d | 30 |\n", + "| 1 | url | 6 |\n", + "| 2 | desc | 2 |\n", "\n", "No failures were detected.\n", "\n", @@ -508,12 +507,12 @@ "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 1 columns: ['desc']\n", "\n", "Anomalies for columns with these conditions:\n", - "| | 0 |\n", - "|:--------|----:|\n", - "| issue_d | 30 |\n", - "| url | 3 |\n", - "| desc | 1 |\n", - "Accounting for 34 anomalies out of 34\n" + "| | column | count |\n", + "|---:|:---------|--------:|\n", + "| 0 | issue_d | 30 |\n", + "| 1 | url | 6 |\n", + "| 2 | desc | 2 |\n", + "Accounting for 38 anomalies out of 38\n" ] } ], @@ -538,7 +537,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/examples/example_notebooks/diagnoser.ipynb b/examples/example_notebooks/diagnoser.ipynb index 7118071..a1fdfcd 100644 --- a/examples/example_notebooks/diagnoser.ipynb +++ b/examples/example_notebooks/diagnoser.ipynb @@ -18,14 +18,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-05-03T20:44:23.177189Z", - "start_time": "2024-05-03T20:44:23.175308Z" + "end_time": "2024-05-14T19:55:38.467963Z", + "start_time": "2024-05-14T19:55:38.465132Z" } }, "outputs": [], @@ -48,11 +48,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:30.824990Z", - "start_time": "2024-05-03T20:44:23.209328Z" + "end_time": "2024-05-14T19:55:44.329405Z", + "start_time": "2024-05-14T19:55:38.475473Z" } }, "outputs": [], @@ -82,11 +82,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:31.391534Z", - "start_time": "2024-05-03T20:44:30.827501Z" + "end_time": "2024-05-14T19:55:44.606466Z", + "start_time": "2024-05-14T19:55:44.332079Z" } }, "outputs": [], @@ -106,19 +106,19 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:38.796330Z", - "start_time": "2024-05-03T20:44:31.392151Z" + "end_time": "2024-05-14T19:55:52.358526Z", + "start_time": "2024-05-14T19:55:44.609704Z" } }, "outputs": [ { "data": { - "text/plain": "MonitorDiagnosisReport(orgId='org-0', datasetId='model-0', analyzerId='frequent-items-drift-analyzer-x2hr9z', interval='2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z', expectedBatchCount=0, diagnosticData=DiagnosticDataSummary(diagnosticSegment=Segment(tags=[]), diagnosticProfile=ProfileSummary(minRowName='desc', minRowCount=1674392, maxRowName='desc', maxRowCount=1674392), diagnosticBatches=BatchesSummary(minBatchName='desc', minBatchCount=30, maxBatchName='desc', maxBatchCount=30), analysisResults=AnalysisResultsSummary(results=ResultRecord(diagnosedColumnCount=26, batchCount=30), failures=FailureRecord(totalFailuresCount=0, maxFailuresCount=0, meanFailuresCount=0, byColumnCount=[], byTypeCount=[]), anomalies=AnomalyRecord(totalAnomalyCount=34, maxAnomalyCount=30, meanAnomalyCount=11, batchCount=30, byColumnCount=[NamedCount(name='issue_d', count=30), NamedCount(name='url', count=3), NamedCount(name='desc', count=1)], byColumnBatchCount=[NamedCount(name='addr_state', count=30), NamedCount(name='application_type', count=30), NamedCount(name='debt_settlement_flag', count=30), NamedCount(name='desc', count=2), NamedCount(name='disbursement_method', count=30), NamedCount(name='earliest_cr_line', count=30), NamedCount(name='emp_length', count=30), NamedCount(name='emp_title', count=30), NamedCount(name='grade', count=30), NamedCount(name='hardship_flag', count=30), NamedCount(name='home_ownership', count=30), NamedCount(name='initial_list_status', count=30), NamedCount(name='issue_d', count=30), NamedCount(name='last_credit_pull_d', count=30), NamedCount(name='last_pymnt_d', count=30), NamedCount(name='loan_status', count=30), NamedCount(name='next_pymnt_d', count=30), NamedCount(name='purpose', count=30), NamedCount(name='pymnt_plan', count=30), NamedCount(name='sub_grade', count=30), NamedCount(name='term', count=30), NamedCount(name='title', count=30), NamedCount(name='url', count=30), NamedCount(name='verification_status', count=30), NamedCount(name='verification_status_joint', count=30), NamedCount(name='zip_code', count=30)])), targetedColumnCount=27), qualityIssues=[QualityIssueRecord(name='analyzer_changed', description='Analyzer changed within the diagnostic interval', detectors=['stale_analysis', 'changing_discrete', 'low_drift_threshold', 'missing_baseline_batches', 'small_nonnull_batches'])], conditions=[ConditionRecord(columns=['desc', 'issue_d', 'url'], info=None, summary='many values are unique across batches', name='changing_discrete'), ConditionRecord(columns=['desc'], info=None, summary='less than 500 non-null records in 50% or more of the batches', name='small_nonnull_batches')], monitor=Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536890090, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-x2hr9z', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-x2hr9z'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[GlobalAction(type='global', target='email')]), analyzer=Analyzer(metadata=Metadata(version=5, schemaVersion=1, updatedTimestamp=1714764383143, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['url', 'desc', , 'issue_d'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None))), analyzedColumnCount=26)" + "text/plain": "MonitorDiagnosisReport(orgId='org-0', datasetId='model-0', analyzerId='frequent-items-drift-analyzer-bx6m80', interval='2024-04-13T00:00:00.000Z/2024-05-13T00:00:00.000Z', expectedBatchCount=0, diagnosticData=DiagnosticDataSummary(diagnosticSegment=Segment(tags=[]), diagnosticProfile=ProfileSummary(minRowName='desc', minRowCount=1517489, maxRowName='desc', maxRowCount=1517489), diagnosticBatches=BatchesSummary(minBatchName='desc', minBatchCount=30, maxBatchName='desc', maxBatchCount=30), analysisResults=AnalysisResultsSummary(results=ResultRecord(diagnosedColumnCount=26, batchCount=30), failures=FailureRecord(totalFailuresCount=0, maxFailuresCount=0, meanFailuresCount=0, byColumnCount=[], byTypeCount=[]), anomalies=AnomalyRecord(totalAnomalyCount=38, maxAnomalyCount=30, meanAnomalyCount=12, batchCount=30, byColumnCount=[NamedCount(name='issue_d', count=30), NamedCount(name='url', count=6), NamedCount(name='desc', count=2)], byColumnBatchCount=[NamedCount(name='addr_state', count=30), NamedCount(name='application_type', count=30), NamedCount(name='debt_settlement_flag', count=30), NamedCount(name='desc', count=3), NamedCount(name='disbursement_method', count=30), NamedCount(name='earliest_cr_line', count=30), NamedCount(name='emp_length', count=30), NamedCount(name='emp_title', count=30), NamedCount(name='grade', count=30), NamedCount(name='hardship_flag', count=30), NamedCount(name='home_ownership', count=30), NamedCount(name='initial_list_status', count=30), NamedCount(name='issue_d', count=30), NamedCount(name='last_credit_pull_d', count=30), NamedCount(name='last_pymnt_d', count=30), NamedCount(name='loan_status', count=30), NamedCount(name='next_pymnt_d', count=30), NamedCount(name='purpose', count=30), NamedCount(name='pymnt_plan', count=30), NamedCount(name='sub_grade', count=30), NamedCount(name='term', count=30), NamedCount(name='title', count=30), NamedCount(name='url', count=30), NamedCount(name='verification_status', count=30), NamedCount(name='verification_status_joint', count=30), NamedCount(name='zip_code', count=30)])), targetedColumnCount=27), qualityIssues=[QualityIssueRecord(name='analyzer_changed', description='Analyzer changed within the diagnostic interval', detectors=['stale_analysis', 'changing_discrete', 'low_drift_threshold', 'missing_baseline_batches', 'small_nonnull_batches'])], conditions=[ConditionRecord(columns=['desc', 'issue_d', 'url'], info=None, summary='many values are unique across batches', name='changing_discrete'), ConditionRecord(columns=['desc'], info=None, summary='less than 500 non-null records in 50% or more of the batches', name='small_nonnull_batches')], monitor=Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1711135588156, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-bx6m80', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-bx6m80'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[]), analyzer=Analyzer(metadata=Metadata(version=2, schemaVersion=1, updatedTimestamp=1715605098048, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-bx6m80', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['url', , 'desc', 'issue_d'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None))), analyzedColumnCount=26)" }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -130,11 +130,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:38.811834Z", - "start_time": "2024-05-03T20:44:38.798918Z" + "end_time": "2024-05-14T19:55:52.375039Z", + "start_time": "2024-05-14T19:55:52.361642Z" } }, "outputs": [ @@ -142,27 +142,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "Diagnosis is for monitor \"frequent-items-drift-monitor-x2hr9z\" [frequent-items-drift-monitor-x2hr9z] in model-0 org-0, over interval 2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z.\n", - "Monitor has 1 notification actions ['email'].\n", + "Diagnosis is for monitor \"frequent-items-drift-monitor-bx6m80\" [frequent-items-drift-monitor-bx6m80] in model-0 org-0, over interval 2024-04-13T00:00:00.000Z/2024-05-13T00:00:00.000Z.\n", "\n", "Analyzer is drift configuration for frequent_items metric with TrailingWindow baseline.\n", - "Analyzer \"frequent-items-drift-analyzer-x2hr9z\" targets 27 columns and ran on 26 columns in the diagnosed segment.\n", + "Analyzer \"frequent-items-drift-analyzer-bx6m80\" targets 27 columns and ran on 26 columns in the diagnosed segment.\n", "\n", "\n", "Diagnostic segment is \"overall\".\n", "Diagnostic interval contains 30 batches.\n", "\n", - "Diagnostic interval rollup contains 1674392 rows for the diagnosed columns.\n", + "Diagnostic interval rollup contains 1517489 rows for the diagnosed columns.\n", "\n", "Analysis results summary:\n", "Found non-failed results for 26 columns and 30 batches.\n", - "Found 34 anomalies in 3 columns, with up to 100.0% (30) batches having anomalies per column and 36.7% (11.0) on average.\n", + "Found 38 anomalies in 3 columns, with up to 100.0% (30) batches having anomalies per column and 40.0% (12.0) on average.\n", "Columns with anomalies are:\n", - "| | 0 |\n", - "|---:|:----------------|\n", - "| 0 | ('issue_d', 30) |\n", - "| 1 | ('url', 3) |\n", - "| 2 | ('desc', 1) |\n", + "| | column | count |\n", + "|---:|:---------|--------:|\n", + "| 0 | issue_d | 30 |\n", + "| 1 | url | 6 |\n", + "| 2 | desc | 2 |\n", "\n", "No failures were detected.\n", "\n", @@ -174,12 +173,12 @@ "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 1 columns: ['desc']\n", "\n", "Anomalies for columns with these conditions:\n", - "| | 0 |\n", - "|:--------|----:|\n", - "| issue_d | 30 |\n", - "| url | 3 |\n", - "| desc | 1 |\n", - "Accounting for 34 anomalies out of 34\n" + "| | column | count |\n", + "|---:|:---------|--------:|\n", + "| 0 | issue_d | 30 |\n", + "| 1 | url | 6 |\n", + "| 2 | desc | 2 |\n", + "Accounting for 38 anomalies out of 38\n" ] } ], @@ -201,11 +200,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:38.816760Z", - "start_time": "2024-05-03T20:44:38.813018Z" + "end_time": "2024-05-14T19:55:52.379621Z", + "start_time": "2024-05-14T19:55:52.376215Z" } }, "outputs": [], @@ -216,11 +215,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:38.822615Z", - "start_time": "2024-05-03T20:44:38.817801Z" + "end_time": "2024-05-14T19:55:52.384784Z", + "start_time": "2024-05-14T19:55:52.380668Z" } }, "outputs": [ @@ -231,8 +230,8 @@ "{\n", " \"orgId\": \"org-0\",\n", " \"datasetId\": \"model-0\",\n", - " \"analyzerId\": \"frequent-items-drift-analyzer-x2hr9z\",\n", - " \"interval\": \"2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z\",\n", + " \"analyzerId\": \"frequent-items-drift-analyzer-bx6m80\",\n", + " \"interval\": \"2024-04-13T00:00:00.000Z/2024-05-13T00:00:00.000Z\",\n", " \"expectedBatchCount\": 0,\n", " \"diagnosticData\": {\n", " \"diagnosticSegment\": {\n", @@ -240,9 +239,9 @@ " },\n", " \"diagnosticProfile\": {\n", " \"minRowName\": \"desc\",\n", - " \"minRowCount\": 1674392,\n", + " \"minRowCount\": 1517489,\n", " \"maxRowName\": \"desc\",\n", - " \"maxRowCount\": 1674392\n", + " \"maxRowCount\": 1517489\n", " },\n", " \"diagnosticBatches\": {\n", " \"minBatchName\": \"desc\",\n", @@ -263,9 +262,9 @@ " \"byTypeCount\": []\n", " },\n", " \"anomalies\": {\n", - " \"totalAnomalyCount\": 34,\n", + " \"totalAnomalyCount\": 38,\n", " \"maxAnomalyCount\": 30,\n", - " \"meanAnomalyCount\": 11,\n", + " \"meanAnomalyCount\": 12,\n", " \"batchCount\": 30,\n", " \"byColumnCount\": [\n", " {\n", @@ -274,11 +273,11 @@ " },\n", " {\n", " \"name\": \"url\",\n", - " \"count\": 3\n", + " \"count\": 6\n", " },\n", " {\n", " \"name\": \"desc\",\n", - " \"count\": 1\n", + " \"count\": 2\n", " }\n", " ],\n", " \"byColumnBatchCount\": [\n", @@ -296,7 +295,7 @@ " },\n", " {\n", " \"name\": \"desc\",\n", - " \"count\": 2\n", + " \"count\": 3\n", " },\n", " {\n", " \"name\": \"disbursement_method\",\n", @@ -428,15 +427,15 @@ " \"metadata\": {\n", " \"version\": 1,\n", " \"schemaVersion\": 1,\n", - " \"updatedTimestamp\": 1705536890090,\n", + " \"updatedTimestamp\": 1711135588156,\n", " \"author\": \"user_809f777d_3741_4991_8ced_42f09b883ac7\",\n", " \"description\": null\n", " },\n", - " \"id\": \"frequent-items-drift-monitor-x2hr9z\",\n", + " \"id\": \"frequent-items-drift-monitor-bx6m80\",\n", " \"displayName\": null,\n", " \"tags\": null,\n", " \"analyzerIds\": [\n", - " \"frequent-items-drift-analyzer-x2hr9z\"\n", + " \"frequent-items-drift-analyzer-bx6m80\"\n", " ],\n", " \"schedule\": {\n", " \"type\": \"immediate\"\n", @@ -450,22 +449,17 @@ " \"datasetTimestampOffset\": null,\n", " \"groupBy\": null\n", " },\n", - " \"actions\": [\n", - " {\n", - " \"type\": \"global\",\n", - " \"target\": \"email\"\n", - " }\n", - " ]\n", + " \"actions\": []\n", " },\n", " \"analyzer\": {\n", " \"metadata\": {\n", - " \"version\": 5,\n", + " \"version\": 2,\n", " \"schemaVersion\": 1,\n", - " \"updatedTimestamp\": 1714764383143,\n", + " \"updatedTimestamp\": 1715605098048,\n", " \"author\": \"user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98\",\n", " \"description\": null\n", " },\n", - " \"id\": \"frequent-items-drift-analyzer-x2hr9z\",\n", + " \"id\": \"frequent-items-drift-analyzer-bx6m80\",\n", " \"displayName\": null,\n", " \"tags\": null,\n", " \"schedule\": {\n", @@ -487,8 +481,8 @@ " ],\n", " \"exclude\": [\n", " \"url\",\n", - " \"desc\",\n", " \"group:output\",\n", + " \"desc\",\n", " \"issue_d\"\n", " ],\n", " \"profileId\": null\n", @@ -538,11 +532,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:38.832653Z", - "start_time": "2024-05-03T20:44:38.823912Z" + "end_time": "2024-05-14T19:55:52.392694Z", + "start_time": "2024-05-14T19:55:52.385763Z" } }, "outputs": [ @@ -580,11 +574,11 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:38.835957Z", - "start_time": "2024-05-03T20:44:38.833756Z" + "end_time": "2024-05-14T19:55:52.395956Z", + "start_time": "2024-05-14T19:55:52.393579Z" } }, "outputs": [ @@ -603,11 +597,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:39.994817Z", - "start_time": "2024-05-03T20:44:38.836984Z" + "end_time": "2024-05-14T19:55:53.427942Z", + "start_time": "2024-05-14T19:55:52.396796Z" } }, "outputs": [ @@ -642,20 +636,20 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:40.011819Z", - "start_time": "2024-05-03T20:44:39.998108Z" + "end_time": "2024-05-14T19:55:53.449819Z", + "start_time": "2024-05-14T19:55:53.431999Z" } }, "outputs": [ { "data": { - "text/plain": " monitor_id \\\n0 frequent-items-drift-monitor-x2hr9z \n1 discrete-distribution-22ef37c9-monitor \n2 smoggy-chartreuse-owl-3387 \n3 frequent-items-drift-monitor-bx6m80 \n4 frequent-items-drift-monitor-mat0jo \n5 frequent-items-drift-monitor-01rbfl \n6 frequent-items-drift-monitor-0foigt \n7 frequent-items-drift-monitor-3c0hc2 \n8 frequent-items-drift-monitor-9gmtix \n9 elated-palegreen-jaguar-6432 \n10 inferred-data-type-fec5a735-monitor \n11 unique-ratio-b7b84aee-monitor \n12 missing-values-ratio-35881327-monitor \n13 numerical-drift-monitor-6oxi83 \n14 numerical-drift-monitor-8yugth \n15 continuous-distribution-956a280c-monitor \n16 dull-floralwhite-raven-5521 \n\n analyzer_id metric column_count \\\n0 frequent-items-drift-analyzer-x2hr9z frequent_items 3 \n1 discrete-distribution-22ef37c9 frequent_items 3 \n2 smoggy-chartreuse-owl-3387-analyzer frequent_items 3 \n3 frequent-items-drift-analyzer-bx6m80 frequent_items 3 \n4 frequent-items-drift-analyzer-mat0jo frequent_items 3 \n5 frequent-items-drift-analyzer-01rbfl frequent_items 3 \n6 frequent-items-drift-analyzer-0foigt frequent_items 3 \n7 frequent-items-drift-analyzer-3c0hc2 frequent_items 3 \n8 frequent-items-drift-analyzer-9gmtix frequent_items 3 \n9 elated-palegreen-jaguar-6432-analyzer histogram 9 \n10 inferred-data-type-fec5a735 inferred_data_type 1 \n11 unique-ratio-b7b84aee unique_est_ratio 69 \n12 missing-values-ratio-35881327 count_null_ratio 21 \n13 numerical-drift-analyzer-6oxi83 histogram 1 \n14 numerical-drift-analyzer-8yugth histogram 1 \n15 continuous-distribution-956a280c histogram 1 \n16 dull-floralwhite-raven-5521-analyzer count 2 \n\n segment_count anomaly_count max_anomaly_per_column \\\n0 1 34 30 \n1 1 34 30 \n2 1 34 30 \n3 1 34 30 \n4 1 34 30 \n5 1 34 30 \n6 1 34 30 \n7 1 34 30 \n8 1 34 30 \n9 1 75 19 \n10 1 14 14 \n11 1 104 4 \n12 1 27 3 \n13 1 2 2 \n14 1 2 2 \n15 1 2 2 \n16 1 3 2 \n\n min_anomaly_per_column avg_anomaly_per_column action_count \\\n0 1 11 1 \n1 1 11 0 \n2 1 11 0 \n3 1 11 0 \n4 1 11 2 \n5 1 11 1 \n6 1 11 0 \n7 1 11 1 \n8 1 11 1 \n9 2 8 0 \n10 14 14 2 \n11 1 1 0 \n12 1 1 0 \n13 2 2 0 \n14 2 2 0 \n15 2 2 0 \n16 1 1 0 \n\n action_targets \n0 [email] \n1 [] \n2 [] \n3 [] \n4 [email, slack] \n5 [email] \n6 [] \n7 [email] \n8 [email] \n9 [] \n10 [email, slack] \n11 [] \n12 [] \n13 [] \n14 [] \n15 [] \n16 [] ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1discrete-distribution-22ef37c9-monitordiscrete-distribution-22ef37c9frequent_items3134301110[]
2smoggy-chartreuse-owl-3387smoggy-chartreuse-owl-3387-analyzerfrequent_items3134301110[]
3frequent-items-drift-monitor-bx6m80frequent-items-drift-analyzer-bx6m80frequent_items3134301110[]
4frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
5frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
6frequent-items-drift-monitor-0foigtfrequent-items-drift-analyzer-0foigtfrequent_items3134301110[]
7frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
8frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
9elated-palegreen-jaguar-6432elated-palegreen-jaguar-6432-analyzerhistogram917519280[]
10inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
11unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio6911044110[]
12missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio211273110[]
13numerical-drift-monitor-6oxi83numerical-drift-analyzer-6oxi83histogram1122220[]
14numerical-drift-monitor-8yugthnumerical-drift-analyzer-8yugthhistogram1122220[]
15continuous-distribution-956a280c-monitorcontinuous-distribution-956a280chistogram1122220[]
16dull-floralwhite-raven-5521dull-floralwhite-raven-5521-analyzercount2132110[]
\n
" + "text/plain": " monitor_id \\\n0 frequent-items-drift-monitor-bx6m80 \n1 discrete-distribution-22ef37c9-monitor \n2 smoggy-chartreuse-owl-3387 \n3 frequent-items-drift-monitor-mat0jo \n4 frequent-items-drift-monitor-01rbfl \n5 frequent-items-drift-monitor-0foigt \n6 frequent-items-drift-monitor-3c0hc2 \n7 frequent-items-drift-monitor-9gmtix \n8 elated-palegreen-jaguar-6432 \n9 frequent-items-drift-monitor-x2hr9z \n10 inferred-data-type-fec5a735-monitor \n11 continuous-distribution-956a280c-monitor \n12 numerical-drift-monitor-6oxi83 \n13 numerical-drift-monitor-8yugth \n14 missing-values-ratio-35881327-monitor \n15 unique-ratio-b7b84aee-monitor \n16 dull-floralwhite-raven-5521 \n\n analyzer_id metric column_count \\\n0 frequent-items-drift-analyzer-bx6m80 frequent_items 3 \n1 discrete-distribution-22ef37c9 frequent_items 3 \n2 smoggy-chartreuse-owl-3387-analyzer frequent_items 3 \n3 frequent-items-drift-analyzer-mat0jo frequent_items 3 \n4 frequent-items-drift-analyzer-01rbfl frequent_items 3 \n5 frequent-items-drift-analyzer-0foigt frequent_items 3 \n6 frequent-items-drift-analyzer-3c0hc2 frequent_items 3 \n7 frequent-items-drift-analyzer-9gmtix frequent_items 3 \n8 elated-palegreen-jaguar-6432-analyzer histogram 8 \n9 frequent-items-drift-analyzer-x2hr9z frequent_items 3 \n10 inferred-data-type-fec5a735 inferred_data_type 1 \n11 continuous-distribution-956a280c histogram 1 \n12 numerical-drift-analyzer-6oxi83 histogram 1 \n13 numerical-drift-analyzer-8yugth histogram 1 \n14 missing-values-ratio-35881327 count_null_ratio 21 \n15 unique-ratio-b7b84aee unique_est_ratio 68 \n16 dull-floralwhite-raven-5521-analyzer count 2 \n\n segment_count anomaly_count max_anomaly_per_column \\\n0 1 38 30 \n1 1 38 30 \n2 1 38 30 \n3 1 38 30 \n4 1 38 30 \n5 1 38 30 \n6 1 38 30 \n7 1 38 30 \n8 1 81 25 \n9 1 22 19 \n10 1 18 18 \n11 1 8 8 \n12 1 8 8 \n13 1 8 8 \n14 1 30 4 \n15 1 103 3 \n16 1 2 1 \n\n min_anomaly_per_column avg_anomaly_per_column action_count \\\n0 2 12 0 \n1 2 12 0 \n2 2 12 0 \n3 2 12 2 \n4 2 12 1 \n5 2 12 0 \n6 2 12 1 \n7 2 12 1 \n8 2 10 0 \n9 1 7 1 \n10 18 18 2 \n11 8 8 0 \n12 8 8 0 \n13 8 8 0 \n14 1 1 0 \n15 1 1 0 \n16 1 1 0 \n\n action_targets \n0 [] \n1 [] \n2 [] \n3 [email, slack] \n4 [email] \n5 [] \n6 [email] \n7 [email] \n8 [] \n9 [email] \n10 [email, slack] \n11 [] \n12 [] \n13 [] \n14 [] \n15 [] \n16 [] ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-bx6m80frequent-items-drift-analyzer-bx6m80frequent_items3138302120[]
1discrete-distribution-22ef37c9-monitordiscrete-distribution-22ef37c9frequent_items3138302120[]
2smoggy-chartreuse-owl-3387smoggy-chartreuse-owl-3387-analyzerfrequent_items3138302120[]
3frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3138302122[email, slack]
4frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3138302121[email]
5frequent-items-drift-monitor-0foigtfrequent-items-drift-analyzer-0foigtfrequent_items3138302120[]
6frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3138302121[email]
7frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3138302121[email]
8elated-palegreen-jaguar-6432elated-palegreen-jaguar-6432-analyzerhistogram8181252100[]
9frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items312219171[email]
10inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11181818182[email, slack]
11continuous-distribution-956a280c-monitorcontinuous-distribution-956a280chistogram1188880[]
12numerical-drift-monitor-6oxi83numerical-drift-analyzer-6oxi83histogram1188880[]
13numerical-drift-monitor-8yugthnumerical-drift-analyzer-8yugthhistogram1188880[]
14missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio211304110[]
15unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio6811033110[]
16dull-floralwhite-raven-5521dull-floralwhite-raven-5521-analyzercount2121110[]
\n
" }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -678,11 +672,11 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T20:44:45.451258Z", - "start_time": "2024-05-03T20:44:40.012742Z" + "end_time": "2024-05-14T19:55:58.356198Z", + "start_time": "2024-05-14T19:55:53.450765Z" } }, "outputs": [ @@ -691,7 +685,7 @@ "output_type": "stream", "text": [ "discrete-distribution-22ef37c9-monitor\n", - "Diagnosis is for monitor \"discrete-distribution-22ef37c9-monitor\" [discrete-distribution-22ef37c9-monitor] in model-0 org-0, over interval 2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z.\n", + "Diagnosis is for monitor \"discrete-distribution-22ef37c9-monitor\" [discrete-distribution-22ef37c9-monitor] in model-0 org-0, over interval 2024-04-13T00:00:00.000Z/2024-05-13T00:00:00.000Z.\n", "\n", "Analyzer is drift configuration for frequent_items metric with TrailingWindow baseline.\n", "Analyzer \"discrete-distribution-22ef37c9\" targets 30 columns and ran on 26 columns in the diagnosed segment.\n", @@ -700,17 +694,17 @@ "Diagnostic segment is \"overall\".\n", "Diagnostic interval contains 30 batches.\n", "\n", - "Diagnostic interval rollup contains 1674392 rows for the diagnosed columns.\n", + "Diagnostic interval rollup contains 1517489 rows for the diagnosed columns.\n", "\n", "Analysis results summary:\n", "Found non-failed results for 26 columns and 30 batches.\n", - "Found 34 anomalies in 3 columns, with up to 100.0% (30) batches having anomalies per column and 36.7% (11.0) on average.\n", + "Found 38 anomalies in 3 columns, with up to 100.0% (30) batches having anomalies per column and 40.0% (12.0) on average.\n", "Columns with anomalies are:\n", - "| | 0 |\n", - "|---:|:----------------|\n", - "| 0 | ('issue_d', 30) |\n", - "| 1 | ('url', 3) |\n", - "| 2 | ('desc', 1) |\n", + "| | column | count |\n", + "|---:|:---------|--------:|\n", + "| 0 | issue_d | 30 |\n", + "| 1 | url | 6 |\n", + "| 2 | desc | 2 |\n", "\n", "No failures were detected.\n", "\n", @@ -722,12 +716,12 @@ "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 1 columns: ['desc']\n", "\n", "Anomalies for columns with these conditions:\n", - "| | 0 |\n", - "|:--------|----:|\n", - "| issue_d | 30 |\n", - "| url | 3 |\n", - "| desc | 1 |\n", - "Accounting for 34 anomalies out of 34\n" + "| | column | count |\n", + "|---:|:---------|--------:|\n", + "| 0 | issue_d | 30 |\n", + "| 1 | url | 6 |\n", + "| 2 | desc | 2 |\n", + "Accounting for 38 anomalies out of 38\n" ] } ], @@ -749,21 +743,21 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T20:44:45.461470Z", - "start_time": "2024-05-03T20:44:45.452512Z" + "end_time": "2024-05-14T19:55:58.365346Z", + "start_time": "2024-05-14T19:55:58.357308Z" } }, "outputs": [ { "data": { - "text/plain": " monitor_id analyzer_id \\\n0 frequent-items-drift-monitor-x2hr9z frequent-items-drift-analyzer-x2hr9z \n1 frequent-items-drift-monitor-mat0jo frequent-items-drift-analyzer-mat0jo \n2 frequent-items-drift-monitor-01rbfl frequent-items-drift-analyzer-01rbfl \n3 frequent-items-drift-monitor-3c0hc2 frequent-items-drift-analyzer-3c0hc2 \n4 frequent-items-drift-monitor-9gmtix frequent-items-drift-analyzer-9gmtix \n5 inferred-data-type-fec5a735-monitor inferred-data-type-fec5a735 \n\n metric column_count segment_count anomaly_count \\\n0 frequent_items 3 1 34 \n1 frequent_items 3 1 34 \n2 frequent_items 3 1 34 \n3 frequent_items 3 1 34 \n4 frequent_items 3 1 34 \n5 inferred_data_type 1 1 14 \n\n max_anomaly_per_column min_anomaly_per_column avg_anomaly_per_column \\\n0 30 1 11 \n1 30 1 11 \n2 30 1 11 \n3 30 1 11 \n4 30 1 11 \n5 14 14 14 \n\n action_count action_targets \n0 1 [email] \n1 2 [email, slack] \n2 1 [email] \n3 1 [email] \n4 1 [email] \n5 2 [email, slack] ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
2frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
3frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
4frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
5inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
\n
" + "text/plain": " monitor_id analyzer_id \\\n0 frequent-items-drift-monitor-mat0jo frequent-items-drift-analyzer-mat0jo \n1 frequent-items-drift-monitor-01rbfl frequent-items-drift-analyzer-01rbfl \n2 frequent-items-drift-monitor-3c0hc2 frequent-items-drift-analyzer-3c0hc2 \n3 frequent-items-drift-monitor-9gmtix frequent-items-drift-analyzer-9gmtix \n4 frequent-items-drift-monitor-x2hr9z frequent-items-drift-analyzer-x2hr9z \n5 inferred-data-type-fec5a735-monitor inferred-data-type-fec5a735 \n\n metric column_count segment_count anomaly_count \\\n0 frequent_items 3 1 38 \n1 frequent_items 3 1 38 \n2 frequent_items 3 1 38 \n3 frequent_items 3 1 38 \n4 frequent_items 3 1 22 \n5 inferred_data_type 1 1 18 \n\n max_anomaly_per_column min_anomaly_per_column avg_anomaly_per_column \\\n0 30 2 12 \n1 30 2 12 \n2 30 2 12 \n3 30 2 12 \n4 19 1 7 \n5 18 18 18 \n\n action_count action_targets \n0 2 [email, slack] \n1 1 [email] \n2 1 [email] \n3 1 [email] \n4 1 [email] \n5 2 [email, slack] ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3138302122[email, slack]
1frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3138302121[email]
2frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3138302121[email]
3frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3138302121[email]
4frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items312219171[email]
5inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11181818182[email, slack]
\n
" }, - "execution_count": 14, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -789,7 +783,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py b/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py index 20a6d24..5b7be55 100644 --- a/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py +++ b/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py @@ -95,7 +95,7 @@ class FailureRecord(BaseModel): byTypeCount: List[NamedCount] def describe(self) -> str: - failures = pd.DataFrame([c.to_tuple() for c in self.byColumnCount]) + failures = pd.DataFrame([c.to_tuple() for c in self.byColumnCount], columns=['column', 'count']) failure_types = [t.name for t in self.byTypeCount] if len(failures) == 0: return "No failures were detected." @@ -116,7 +116,7 @@ class AnomalyRecord(BaseModel): byColumnBatchCount: List[NamedCount] def describe(self) -> str: - counts = pd.Series([c.to_tuple() for c in self.byColumnCount]) + counts = pd.DataFrame([c.to_tuple() for c in self.byColumnCount], columns=['column', 'count']) max_count = int(self.maxAnomalyCount) max_pct = max_count * 100 / self.batchCount mean_count = float(self.meanAnomalyCount) @@ -208,8 +208,9 @@ def describe_conditions(self) -> str: idx, values = zip(*count_tuples) count_by_col = pd.Series(values, idx) cols_with_count = filter_by_index(cols.tolist(), count_by_col).sort_values(ascending=False) - cols_with_count.rename("anomalies") - text += describe_truncated_table(cols_with_count) + cols_with_count.index.name = 'column' + cols_with_count.name = 'count' + text += describe_truncated_table(pd.DataFrame(cols_with_count).reset_index()) text += f"\nAccounting for {cols_with_count.sum()} anomalies out of " f"{count_by_col.sum()}\n" return text From d5c5b8dfb189d0b3973a26d73aa8df32873c9a74 Mon Sep 17 00:00:00 2001 From: Christine Draper Date: Tue, 14 May 2024 16:21:46 -0400 Subject: [PATCH 2/2] Lint fixes --- .../monitor/diagnoser/models/diagnosis_report.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py b/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py index 5b7be55..2042bb2 100644 --- a/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py +++ b/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py @@ -95,7 +95,7 @@ class FailureRecord(BaseModel): byTypeCount: List[NamedCount] def describe(self) -> str: - failures = pd.DataFrame([c.to_tuple() for c in self.byColumnCount], columns=['column', 'count']) + failures = pd.DataFrame([c.to_tuple() for c in self.byColumnCount], columns=["column", "count"]) failure_types = [t.name for t in self.byTypeCount] if len(failures) == 0: return "No failures were detected." @@ -116,7 +116,7 @@ class AnomalyRecord(BaseModel): byColumnBatchCount: List[NamedCount] def describe(self) -> str: - counts = pd.DataFrame([c.to_tuple() for c in self.byColumnCount], columns=['column', 'count']) + counts = pd.DataFrame([c.to_tuple() for c in self.byColumnCount], columns=["column", "count"]) max_count = int(self.maxAnomalyCount) max_pct = max_count * 100 / self.batchCount mean_count = float(self.meanAnomalyCount) @@ -208,8 +208,8 @@ def describe_conditions(self) -> str: idx, values = zip(*count_tuples) count_by_col = pd.Series(values, idx) cols_with_count = filter_by_index(cols.tolist(), count_by_col).sort_values(ascending=False) - cols_with_count.index.name = 'column' - cols_with_count.name = 'count' + cols_with_count.index.name = "column" + cols_with_count.name = "count" text += describe_truncated_table(pd.DataFrame(cols_with_count).reset_index()) text += f"\nAccounting for {cols_with_count.sum()} anomalies out of " f"{count_by_col.sum()}\n"