Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expfmt: Add UTF-8 syntax support in text_parse.go #670

Merged
merged 17 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 131 additions & 21 deletions expfmt/text_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ import (
"math"
"strconv"
"strings"
"unicode/utf8"

dto "github.com/prometheus/client_model/go"

"google.golang.org/protobuf/proto"

"github.com/prometheus/common/model"
Expand Down Expand Up @@ -60,6 +60,7 @@ type TextParser struct {
currentMF *dto.MetricFamily
currentMetric *dto.Metric
currentLabelPair *dto.LabelPair
currentLabelPairs []*dto.LabelPair // Temporarily stores label pairs while parsing a metric line.

// The remaining member variables are only used for summaries/histograms.
currentLabels map[string]string // All labels including '__name__' but excluding 'quantile'/'le'
Expand All @@ -74,6 +75,7 @@ type TextParser struct {
// count and sum of that summary/histogram.
currentIsSummaryCount, currentIsSummarySum bool
currentIsHistogramCount, currentIsHistogramSum bool
currentMetricIsInsideBraces bool
}

// TextToMetricFamilies reads 'in' as the simple and flat text-based exchange
Expand Down Expand Up @@ -137,12 +139,14 @@ func (p *TextParser) reset(in io.Reader) {
}
p.currentQuantile = math.NaN()
p.currentBucket = math.NaN()
p.currentMF = nil
}

// startOfLine represents the state where the next byte read from p.buf is the
// start of a line (or whitespace leading up to it).
func (p *TextParser) startOfLine() stateFn {
p.lineCount++
p.currentMetricIsInsideBraces = false
if p.skipBlankTab(); p.err != nil {
// This is the only place that we expect to see io.EOF,
// which is not an error but the signal that we are done.
Expand All @@ -158,6 +162,9 @@ func (p *TextParser) startOfLine() stateFn {
return p.startComment
case '\n':
return p.startOfLine // Empty line, start the next one.
case '{':
p.currentMetricIsInsideBraces = true
return p.readingLabels
}
return p.readingMetricName
}
Expand Down Expand Up @@ -275,6 +282,8 @@ func (p *TextParser) startLabelName() stateFn {
return nil // Unexpected end of input.
}
if p.currentByte == '}' {
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
p.currentLabelPairs = nil
if p.skipBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
Expand All @@ -287,6 +296,38 @@ func (p *TextParser) startLabelName() stateFn {
p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName()))
return nil
}
if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
if p.currentByte != '=' {
if p.currentMetricIsInsideBraces {
if p.currentMF != nil && p.currentMF.GetName() != p.currentToken.String() {
p.parseError(fmt.Sprintf("multiple metric names %s %s", p.currentMF.GetName(), p.currentToken.String()))
return nil
}
switch p.currentByte {
case ',':
p.setOrCreateCurrentMF()
p.currentMetric = &dto.Metric{}
return p.startLabelName
case '}':
p.setOrCreateCurrentMF()
p.currentMetric = &dto.Metric{}
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
p.currentLabelPairs = nil
if p.skipBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
return p.readingValue
default:
p.parseError(fmt.Sprintf("unexpected end of metric name %q", p.currentByte))
return nil
}
}
p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
p.currentLabelPairs = nil
return nil
}
p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())}
if p.currentLabelPair.GetName() == string(model.MetricNameLabel) {
p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel))
Expand All @@ -296,23 +337,17 @@ func (p *TextParser) startLabelName() stateFn {
// labels to 'real' labels.
if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == model.QuantileLabel) &&
!(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == model.BucketLabel) {
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPair)
}
if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
if p.currentByte != '=' {
p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
return nil
p.currentLabelPairs = append(p.currentLabelPairs, p.currentLabelPair)
}
// Check for duplicate label names.
labels := make(map[string]struct{})
for _, l := range p.currentMetric.Label {
for _, l := range p.currentLabelPairs {
lName := l.GetName()
if _, exists := labels[lName]; !exists {
labels[lName] = struct{}{}
} else {
p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName()))
p.currentLabelPairs = nil
return nil
}
}
Expand Down Expand Up @@ -345,6 +380,7 @@ func (p *TextParser) startLabelValue() stateFn {
if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
// Create a more helpful error message.
p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue()))
p.currentLabelPairs = nil
return nil
}
} else {
Expand All @@ -371,12 +407,19 @@ func (p *TextParser) startLabelValue() stateFn {
return p.startLabelName

case '}':
if p.currentMF == nil {
p.parseError("invalid metric name")
return nil
}
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
p.currentLabelPairs = nil
if p.skipBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
return p.readingValue
default:
p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue()))
p.currentLabelPairs = nil
return nil
}
}
Expand Down Expand Up @@ -585,6 +628,8 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
p.currentToken.WriteByte(p.currentByte)
case 'n':
p.currentToken.WriteByte('\n')
case '"':
p.currentToken.WriteByte('"')
default:
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
return
Expand All @@ -610,13 +655,45 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
// but not into p.currentToken.
func (p *TextParser) readTokenAsMetricName() {
p.currentToken.Reset()
// A UTF-8 metric name must be quoted and may have escaped characters.
quoted := false
escaped := false
if !isValidMetricNameStart(p.currentByte) {
return
}
for {
p.currentToken.WriteByte(p.currentByte)
for p.err == nil {
if escaped {
switch p.currentByte {
case '\\':
p.currentToken.WriteByte(p.currentByte)
case 'n':
p.currentToken.WriteByte('\n')
case '"':
p.currentToken.WriteByte('"')
default:
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
return
}
escaped = false
} else {
switch p.currentByte {
case '"':
quoted = !quoted
if !quoted {
p.currentByte, p.err = p.buf.ReadByte()
return
}
case '\n':
p.parseError(fmt.Sprintf("metric name %q contains unescaped new-line", p.currentToken.String()))
return
case '\\':
escaped = true
default:
p.currentToken.WriteByte(p.currentByte)
}
}
p.currentByte, p.err = p.buf.ReadByte()
if p.err != nil || !isValidMetricNameContinuation(p.currentByte) {
if !isValidMetricNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == ' ') {
return
}
}
Expand All @@ -628,13 +705,45 @@ func (p *TextParser) readTokenAsMetricName() {
// but not into p.currentToken.
func (p *TextParser) readTokenAsLabelName() {
p.currentToken.Reset()
// A UTF-8 label name must be quoted and may have escaped characters.
quoted := false
escaped := false
if !isValidLabelNameStart(p.currentByte) {
return
}
for {
p.currentToken.WriteByte(p.currentByte)
for p.err == nil {
if escaped {
switch p.currentByte {
case '\\':
p.currentToken.WriteByte(p.currentByte)
case 'n':
p.currentToken.WriteByte('\n')
case '"':
p.currentToken.WriteByte('"')
default:
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
return
}
escaped = false
} else {
switch p.currentByte {
case '"':
quoted = !quoted
if !quoted {
p.currentByte, p.err = p.buf.ReadByte()
return
}
case '\n':
p.parseError(fmt.Sprintf("label name %q contains unescaped new-line", p.currentToken.String()))
return
case '\\':
escaped = true
default:
p.currentToken.WriteByte(p.currentByte)
}
}
p.currentByte, p.err = p.buf.ReadByte()
if p.err != nil || !isValidLabelNameContinuation(p.currentByte) {
if !isValidLabelNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == '=') {
return
}
}
Expand All @@ -660,6 +769,7 @@ func (p *TextParser) readTokenAsLabelValue() {
p.currentToken.WriteByte('\n')
default:
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
p.currentLabelPairs = nil
return
}
escaped = false
Expand Down Expand Up @@ -718,19 +828,19 @@ func (p *TextParser) setOrCreateCurrentMF() {
}

func isValidLabelNameStart(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_'
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '"'
}

func isValidLabelNameContinuation(b byte) bool {
return isValidLabelNameStart(b) || (b >= '0' && b <= '9')
func isValidLabelNameContinuation(b byte, quoted bool) bool {
return isValidLabelNameStart(b) || (b >= '0' && b <= '9') || (quoted && utf8.ValidString(string(b)))
}

func isValidMetricNameStart(b byte) bool {
return isValidLabelNameStart(b) || b == ':'
}

func isValidMetricNameContinuation(b byte) bool {
return isValidLabelNameContinuation(b) || b == ':'
func isValidMetricNameContinuation(b byte, quoted bool) bool {
return isValidLabelNameContinuation(b, quoted) || b == ':'
}

func isBlankOrTab(b byte) bool {
Expand Down
Loading