Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add unit testcases for files in Text format. #2274

Merged
merged 10 commits into from
Mar 12, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -19,40 +19,42 @@ package sidecarmetricscollector
import (
"os"
"path/filepath"
"reflect"
"testing"
"time"

"github.com/google/go-cmp/cmp"
commonv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/common/v1beta1"
v1beta1 "github.com/kubeflow/katib/pkg/apis/manager/v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
)

var testJsonDataPath = filepath.Join("testdata", "JSON")
var (
testJsonDataPath = filepath.Join("testdata", "JSON")
testTextDataPath = filepath.Join("testdata", "TEXT")
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
)

func TestCollectObservationLog(t *testing.T) {

if err := generateTestFiles(); err != nil {
if err := generateJSONTestFiles(); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(filepath.Dir(testJsonDataPath))
if err := generateTEXTTestFiles(); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(filepath.Dir(testTextDataPath))

// TODO (tenzen-y): We should add tests for logs in TEXT format.
// Ref: https://github.com/kubeflow/katib/issues/1756
testCases := []struct {
description string
filePath string
metrics []string
filters []string
fileFormat commonv1beta1.FileFormat
err bool
expected *v1beta1.ObservationLog
testCases := map[string]struct {
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
filePath string
metrics []string
filters []string
fileFormat commonv1beta1.FileFormat
err bool
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
expected *v1beta1.ObservationLog
}{
{
description: "Positive case for logs in JSON format",
filePath: filepath.Join(testJsonDataPath, "good.json"),
metrics: []string{"acc", "loss"},
fileFormat: commonv1beta1.JsonFormat,
"Positive case for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, "good.json"),
metrics: []string{"acc", "loss"},
fileFormat: commonv1beta1.JsonFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
Expand Down Expand Up @@ -93,28 +95,124 @@ func TestCollectObservationLog(t *testing.T) {
},
},
},
{
description: "Invalid file name",
filePath: "invalid",
err: true,
"Positive case for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, "good.log"),
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we mentioned in the issue, our motivation is to verify if this filter works fine to avoid bugs like this: #1754

We should add unit test for our File Metrics Collector to verify log parsing.
That will help to avoid issues similar to: #1754

So, could you add more than valid and invalid cases?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'll do it later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some new test cases similar to bugs in #1754 have been added to the test file.

fileFormat: commonv1beta1.TextFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.8078",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.5183",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.6752",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.3634",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "100",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "888.333",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "-0.4759",
},
},
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.8671",
},
},
},
},
},
{
description: "Invalid file format",
filePath: filepath.Join(testJsonDataPath, "good.json"),
fileFormat: "invalid",
err: true,
"Invalid case for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, "invalid-value.log"),
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
metrics: []string{"accuracy", "loss"},
fileFormat: commonv1beta1.TextFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: consts.UnavailableMetricValue,
},
},
},
},
},
{
description: "Invalid formatted file for logs in JSON format",
filePath: filepath.Join(testJsonDataPath, "invalid-format.json"),
fileFormat: commonv1beta1.JsonFormat,
err: true,
"Invalid file name": {
filePath: "invalid",
err: true,
},
{
description: "Invalid timestamp for logs in JSON format",
filePath: filepath.Join(testJsonDataPath, "invalid-timestamp.json"),
fileFormat: commonv1beta1.JsonFormat,
metrics: []string{"acc", "loss"},
"Invalid file format": {
filePath: filepath.Join(testTextDataPath, "good.log"),
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
fileFormat: "invalid",
err: true,
},
"Invalid formatted file for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, "invalid-format.json"),
fileFormat: commonv1beta1.JsonFormat,
err: true,
},
"Invalid formatted file for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, "invalid-format.log"),
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
andreyvelich marked this conversation as resolved.
Show resolved Hide resolved
metrics: []string{"accuracy", "loss"},
fileFormat: commonv1beta1.TextFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: consts.UnavailableMetricValue,
},
},
},
},
},
"Invalid timestamp for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, "invalid-timestamp.json"),
fileFormat: commonv1beta1.JsonFormat,
metrics: []string{"acc", "loss"},
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
Expand All @@ -134,11 +232,34 @@ func TestCollectObservationLog(t *testing.T) {
},
},
},
{
description: "Missing objective metric in training logs",
filePath: filepath.Join(testJsonDataPath, "missing-objective-metric.json"),
fileFormat: commonv1beta1.JsonFormat,
metrics: []string{"acc", "loss"},
"Invalid timestamp for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, "invalid-timestamp.log"),
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
fileFormat: commonv1beta1.TextFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.6752",
},
},
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.3634",
},
},
},
},
},
"Missing objective metric in JSON training logs": {
filePath: filepath.Join(testJsonDataPath, "missing-objective-metric.json"),
fileFormat: commonv1beta1.JsonFormat,
metrics: []string{"acc", "loss"},
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
Expand All @@ -151,23 +272,39 @@ func TestCollectObservationLog(t *testing.T) {
},
},
},
"Missing objective metric in TEXT training logs": {
filePath: filepath.Join(testTextDataPath, "missing-objective-metric.log"),
fileFormat: commonv1beta1.TextFormat,
metrics: []string{"accuracy", "loss"},
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: consts.UnavailableMetricValue,
},
},
},
},
},
}

for _, test := range testCases {
t.Run(test.description, func(t *testing.T) {
for name, test := range testCases {
t.Run(name, func(t *testing.T) {
actual, err := CollectObservationLog(test.filePath, test.metrics, test.filters, test.fileFormat)
if (err != nil) != test.err {
t.Errorf("\nGOT: \n%v\nWANT: %v\n", err, test.err)
} else {
if !reflect.DeepEqual(actual, test.expected) {
t.Errorf("Expected %v\n got %v", test.expected, actual)
if diff := cmp.Diff(test.expected, actual); diff != "" {
t.Errorf("Unexpected parsed result (-want,+got):\n%s", diff)
}
}
})
}
}

func generateTestFiles() error {
func generateJSONTestFiles() error {
if _, err := os.Stat(testJsonDataPath); err != nil {
if err = os.MkdirAll(testJsonDataPath, 0700); err != nil {
return err
Expand Down Expand Up @@ -215,3 +352,57 @@ func generateTestFiles() error {

return nil
}

func generateTEXTTestFiles() error {
if _, err := os.Stat(testTextDataPath); err != nil {
if err = os.MkdirAll(testTextDataPath, 0700); err != nil {
return err
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move directory creation outside of these functions and create it before we invoke them ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are okay. But keeping them in generateXXXTestFiles will make the code look more tidy.

Copy link
Member

@andreyvelich andreyvelich Mar 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, but since we always run these functions sequentially why do we need to have duplicate code to create directories in each of them ?
Maybe we could have another function called: generateTestDirectory which will create testdata dir.
WDYT @Electronic-Waste @tenzen-y ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andreyvelich That sounds reasonable. Having separate function wouldn't be wroth it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant that consolidating 2 generators into 1 would be better.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, that sounds reasonable. I'll modify it soon.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andreyvelich @tenzen-y I also integrate os.RemoveAll operations into the func deleteTestDirs(). plz cc👀


testData := []struct {
fileName string
data string
}{
{
fileName: "good.log",
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.8078};{metricName: loss, metricValue: 0.5183}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 100}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -0.4759}
{metricName: loss, metricValue: 0.8671}`,
},
{
fileName: "invalid-value.log",
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: .333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: - 345.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.}`,
},
{
fileName: "invalid-format.log",
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}`,
},
{
fileName: "invalid-timestamp.log",
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
invalid INFO {metricName: loss, metricValue: 0.3634}`,
},
{
fileName: "missing-objective-metric.log",
data: `2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.8671}`,
},
}

for _, td := range testData {
filePath := filepath.Join(testTextDataPath, td.fileName)
if err := os.WriteFile(filePath, []byte(td.data), 0600); err != nil {
return err
}
}
return nil
}
Loading