Skip to content

Commit

Permalink
[pkg/stanza] Rename tokenize package to split
Browse files Browse the repository at this point in the history
  • Loading branch information
djaglowski committed Sep 8, 2023
1 parent c26d3f9 commit c4de85e
Show file tree
Hide file tree
Showing 19 changed files with 142 additions and 115 deletions.
27 changes: 27 additions & 0 deletions .chloggen/pkg-stanza-rm-tokenize.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: breaking

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/stanza

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Rename "tokenize" package to "split"

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [26540]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [api]
36 changes: 18 additions & 18 deletions pkg/stanza/fileconsumer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/matcher"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
)

Expand Down Expand Up @@ -53,7 +53,7 @@ func NewConfig() *Config {
IncludeFileNameResolved: false,
IncludeFilePathResolved: false,
PollInterval: 200 * time.Millisecond,
Multiline: tokenize.NewMultilineConfig(),
Multiline: split.NewMultilineConfig(),
Encoding: defaultEncoding,
StartAt: "end",
FingerprintSize: fingerprint.DefaultSize,
Expand All @@ -66,22 +66,22 @@ func NewConfig() *Config {
// Config is the configuration of a file input operator
type Config struct {
matcher.Criteria `mapstructure:",squash"`
IncludeFileName bool `mapstructure:"include_file_name,omitempty"`
IncludeFilePath bool `mapstructure:"include_file_path,omitempty"`
IncludeFileNameResolved bool `mapstructure:"include_file_name_resolved,omitempty"`
IncludeFilePathResolved bool `mapstructure:"include_file_path_resolved,omitempty"`
PollInterval time.Duration `mapstructure:"poll_interval,omitempty"`
StartAt string `mapstructure:"start_at,omitempty"`
FingerprintSize helper.ByteSize `mapstructure:"fingerprint_size,omitempty"`
MaxLogSize helper.ByteSize `mapstructure:"max_log_size,omitempty"`
MaxConcurrentFiles int `mapstructure:"max_concurrent_files,omitempty"`
MaxBatches int `mapstructure:"max_batches,omitempty"`
DeleteAfterRead bool `mapstructure:"delete_after_read,omitempty"`
Multiline tokenize.MultilineConfig `mapstructure:"multiline,omitempty"`
TrimConfig trim.Config `mapstructure:",squash,omitempty"`
Encoding string `mapstructure:"encoding,omitempty"`
FlushPeriod time.Duration `mapstructure:"force_flush_period,omitempty"`
Header *HeaderConfig `mapstructure:"header,omitempty"`
IncludeFileName bool `mapstructure:"include_file_name,omitempty"`
IncludeFilePath bool `mapstructure:"include_file_path,omitempty"`
IncludeFileNameResolved bool `mapstructure:"include_file_name_resolved,omitempty"`
IncludeFilePathResolved bool `mapstructure:"include_file_path_resolved,omitempty"`
PollInterval time.Duration `mapstructure:"poll_interval,omitempty"`
StartAt string `mapstructure:"start_at,omitempty"`
FingerprintSize helper.ByteSize `mapstructure:"fingerprint_size,omitempty"`
MaxLogSize helper.ByteSize `mapstructure:"max_log_size,omitempty"`
MaxConcurrentFiles int `mapstructure:"max_concurrent_files,omitempty"`
MaxBatches int `mapstructure:"max_batches,omitempty"`
DeleteAfterRead bool `mapstructure:"delete_after_read,omitempty"`
Multiline split.MultilineConfig `mapstructure:"multiline,omitempty"`
TrimConfig trim.Config `mapstructure:",squash,omitempty"`
Encoding string `mapstructure:"encoding,omitempty"`
FlushPeriod time.Duration `mapstructure:"force_flush_period,omitempty"`
Header *HeaderConfig `mapstructure:"header,omitempty"`
}

type HeaderConfig struct {
Expand Down
24 changes: 12 additions & 12 deletions pkg/stanza/fileconsumer/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/operatortest"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/parser/regex"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
)

func TestUnmarshal(t *testing.T) {
Expand Down Expand Up @@ -280,7 +280,7 @@ func TestUnmarshal(t *testing.T) {
Name: "multiline_line_start_string",
Expect: func() *mockOperatorConfig {
cfg := NewConfig()
cfg.Multiline = tokenize.MultilineConfig{
cfg.Multiline = split.MultilineConfig{
LineStartPattern: "Start",
}
return newMockOperatorConfig(cfg)
Expand All @@ -290,7 +290,7 @@ func TestUnmarshal(t *testing.T) {
Name: "multiline_line_start_special",
Expect: func() *mockOperatorConfig {
cfg := NewConfig()
cfg.Multiline = tokenize.MultilineConfig{
cfg.Multiline = split.MultilineConfig{
LineStartPattern: "%",
}
return newMockOperatorConfig(cfg)
Expand All @@ -300,7 +300,7 @@ func TestUnmarshal(t *testing.T) {
Name: "multiline_line_end_string",
Expect: func() *mockOperatorConfig {
cfg := NewConfig()
cfg.Multiline = tokenize.MultilineConfig{
cfg.Multiline = split.MultilineConfig{
LineEndPattern: "Start",
}
return newMockOperatorConfig(cfg)
Expand All @@ -310,7 +310,7 @@ func TestUnmarshal(t *testing.T) {
Name: "multiline_line_end_special",
Expect: func() *mockOperatorConfig {
cfg := NewConfig()
cfg.Multiline = tokenize.MultilineConfig{
cfg.Multiline = split.MultilineConfig{
LineEndPattern: "%",
}
return newMockOperatorConfig(cfg)
Expand Down Expand Up @@ -452,7 +452,7 @@ func TestBuild(t *testing.T) {
{
"MultilineConfiguredStartAndEndPatterns",
func(f *Config) {
f.Multiline = tokenize.MultilineConfig{
f.Multiline = split.MultilineConfig{
LineEndPattern: "Exists",
LineStartPattern: "Exists",
}
Expand All @@ -463,7 +463,7 @@ func TestBuild(t *testing.T) {
{
"MultilineConfiguredStartPattern",
func(f *Config) {
f.Multiline = tokenize.MultilineConfig{
f.Multiline = split.MultilineConfig{
LineStartPattern: "START.*",
}
},
Expand All @@ -473,7 +473,7 @@ func TestBuild(t *testing.T) {
{
"MultilineConfiguredEndPattern",
func(f *Config) {
f.Multiline = tokenize.MultilineConfig{
f.Multiline = split.MultilineConfig{
LineEndPattern: "END.*",
}
},
Expand All @@ -491,7 +491,7 @@ func TestBuild(t *testing.T) {
{
"LineStartAndEnd",
func(f *Config) {
f.Multiline = tokenize.MultilineConfig{
f.Multiline = split.MultilineConfig{
LineStartPattern: ".*",
LineEndPattern: ".*",
}
Expand All @@ -502,15 +502,15 @@ func TestBuild(t *testing.T) {
{
"NoLineStartOrEnd",
func(f *Config) {
f.Multiline = tokenize.MultilineConfig{}
f.Multiline = split.MultilineConfig{}
},
require.NoError,
func(t *testing.T, f *Manager) {},
},
{
"InvalidLineStartRegex",
func(f *Config) {
f.Multiline = tokenize.MultilineConfig{
f.Multiline = split.MultilineConfig{
LineStartPattern: "(",
}
},
Expand All @@ -520,7 +520,7 @@ func TestBuild(t *testing.T) {
{
"InvalidLineEndRegex",
func(f *Config) {
f.Multiline = tokenize.MultilineConfig{
f.Multiline = split.MultilineConfig{
LineEndPattern: "(",
}
},
Expand Down
4 changes: 2 additions & 2 deletions pkg/stanza/fileconsumer/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/matcher"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
)

func TestCleanStop(t *testing.T) {
Expand Down Expand Up @@ -547,7 +547,7 @@ func TestNoNewline(t *testing.T) {
tempDir := t.TempDir()
cfg := NewConfig().includeDir(tempDir)
cfg.StartAt = "beginning"
cfg.Multiline = tokenize.NewMultilineConfig()
cfg.Multiline = split.NewMultilineConfig()
cfg.FlushPeriod = time.Nanosecond
operator, emitCalls := buildTestManager(t, cfg)

Expand Down
4 changes: 2 additions & 2 deletions pkg/stanza/fileconsumer/internal/header/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/pipeline"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
)

type Config struct {
Expand Down Expand Up @@ -69,7 +69,7 @@ func NewConfig(matchRegex string, metadataOperators []operator.Config, enc encod
return nil, fmt.Errorf("failed to compile `pattern`: %w", err)
}

splitFunc, err := tokenize.NewlineSplitFunc(enc, false, func(b []byte) []byte {
splitFunc, err := split.NewlineSplitFunc(enc, false, func(b []byte) []byte {
return bytes.Trim(b, "\r\n")
})
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions pkg/stanza/fileconsumer/internal/splitter/multiline.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ import (
"golang.org/x/text/encoding"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/flush"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
)

type multilineFactory struct {
multilineCfg tokenize.MultilineConfig
multilineCfg split.MultilineConfig
encoding encoding.Encoding
maxLogSize int
trimFunc trim.Func
Expand All @@ -25,7 +25,7 @@ type multilineFactory struct {
var _ Factory = (*multilineFactory)(nil)

func NewMultilineFactory(
multilineCfg tokenize.MultilineConfig,
multilineCfg split.MultilineConfig,
encoding encoding.Encoding,
maxLogSize int,
trimFunc trim.Func,
Expand Down
8 changes: 4 additions & 4 deletions pkg/stanza/fileconsumer/internal/splitter/multiline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,30 @@ import (
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/unicode"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
)

func TestMultilineBuild(t *testing.T) {
tests := []struct {
name string
multilineCfg tokenize.MultilineConfig
multilineCfg split.MultilineConfig
encoding encoding.Encoding
maxLogSize int
flushPeriod time.Duration
wantErr bool
}{
{
name: "default configuration",
multilineCfg: tokenize.NewMultilineConfig(),
multilineCfg: split.NewMultilineConfig(),
encoding: unicode.UTF8,
maxLogSize: 1024,
flushPeriod: 100 * time.Millisecond,
wantErr: false,
},
{
name: "Multiline error",
multilineCfg: tokenize.MultilineConfig{
multilineCfg: split.MultilineConfig{
LineStartPattern: "START",
LineEndPattern: "END",
},
Expand Down
14 changes: 7 additions & 7 deletions pkg/stanza/fileconsumer/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/splitter"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/parser/regex"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/split"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
)

func TestPersistFlusher(t *testing.T) {
flushPeriod := 100 * time.Millisecond
f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), defaultMaxLogSize, flushPeriod)
f, emitChan := testReaderFactory(t, split.NewMultilineConfig(), defaultMaxLogSize, flushPeriod)

temp := openTemp(t, t.TempDir())
fp, err := f.newFingerprint(temp)
Expand Down Expand Up @@ -110,7 +110,7 @@ func TestTokenization(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), defaultMaxLogSize, defaultFlushPeriod)
f, emitChan := testReaderFactory(t, split.NewMultilineConfig(), defaultMaxLogSize, defaultFlushPeriod)

temp := openTemp(t, t.TempDir())
_, err := temp.Write(tc.fileContent)
Expand Down Expand Up @@ -140,7 +140,7 @@ func TestTokenizationTooLong(t *testing.T) {
[]byte("aaa"),
}

f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), 10, defaultFlushPeriod)
f, emitChan := testReaderFactory(t, split.NewMultilineConfig(), 10, defaultFlushPeriod)

temp := openTemp(t, t.TempDir())
_, err := temp.Write(fileContent)
Expand Down Expand Up @@ -170,7 +170,7 @@ func TestTokenizationTooLongWithLineStartPattern(t *testing.T) {
[]byte("2023-01-01 2"),
}

mCfg := tokenize.NewMultilineConfig()
mCfg := split.NewMultilineConfig()
mCfg.LineStartPattern = `\d+-\d+-\d+`
f, emitChan := testReaderFactory(t, mCfg, 15, defaultFlushPeriod)

Expand All @@ -195,7 +195,7 @@ func TestTokenizationTooLongWithLineStartPattern(t *testing.T) {
func TestHeaderFingerprintIncluded(t *testing.T) {
fileContent := []byte("#header-line\naaa\n")

f, _ := testReaderFactory(t, tokenize.NewMultilineConfig(), 10, defaultFlushPeriod)
f, _ := testReaderFactory(t, split.NewMultilineConfig(), 10, defaultFlushPeriod)

regexConf := regex.NewConfig()
regexConf.Regex = "^#(?P<header>.*)"
Expand Down Expand Up @@ -223,7 +223,7 @@ func TestHeaderFingerprintIncluded(t *testing.T) {
require.Equal(t, []byte("#header-line\naaa\n"), r.Fingerprint.FirstBytes)
}

func testReaderFactory(t *testing.T, mCfg tokenize.MultilineConfig, maxLogSize int, flushPeriod time.Duration) (*readerFactory, chan *emitParams) {
func testReaderFactory(t *testing.T, mCfg split.MultilineConfig, maxLogSize int, flushPeriod time.Duration) (*readerFactory, chan *emitParams) {
emitChan := make(chan *emitParams, 100)
enc, err := decode.LookupEncoding(defaultEncoding)
trimFunc := trim.Whitespace
Expand Down
Loading

0 comments on commit c4de85e

Please sign in to comment.