Skip to content

Commit

Permalink
Fix log-manager on Azure (#2254)
Browse files Browse the repository at this point in the history
  • Loading branch information
vomba authored Sep 24, 2024
1 parent 5ff8499 commit e5d2b97
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 50 deletions.
7 changes: 7 additions & 0 deletions config/providers/azure/sc-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,10 @@ harbor:
persistence:
type: objectStorage
disableRedirect: false

fluentd:
logManager:
compaction:
# azcopy is used by az CLI for downloading files
azureCopyBufferGB: 0.3
azureCopyConcurrency: 8
2 changes: 1 addition & 1 deletion config/sc-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,7 @@ fluentd:
memory: 50Mi
limits:
cpu: 200m
memory: 200Mi
memory: 400Mi

tolerations: []
nodeSelector: {}
Expand Down
6 changes: 6 additions & 0 deletions config/schemas/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4978,6 +4978,12 @@ properties:
type: object
additionalProperties: false
properties:
azureCopyBufferGB:
type: number
description: Configure the memory buffer size in GB (accepts decimals) for Azure copy operations.
azureCopyConcurrency:
type: number
description: Configure the maximum number of concurrent download requests for Azure copy operations.
volume:
title: Log Manager Compaction Volume
description: Configure log-manager compaction volume.
Expand Down
2 changes: 1 addition & 1 deletion helmfile.d/charts/log-manager/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: log-manager
description: log manager for Compliant Kubernetes
type: application
appVersion: 0.3.0
appVersion: 0.3.1
version: 0.1.0
49 changes: 26 additions & 23 deletions helmfile.d/charts/log-manager/files/compaction.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

set -euo pipefail
if [ "$STORAGE_SERVICE" = "azure" ]; then
# Azure Blob configuration
# Azure Blob configuration
: "${AZURE_STORAGE_CONNECTION_STRING:?Missing AZURE_STORAGE_CONNECTION_STRING}"
: "${AZURE_CONTAINER_NAME:?Missing AZURE_CONTAINER_NAME}"
: "${AZURE_PREFIX:?Missing AZURE_PREFIX}"
else
# S3 configuration
# S3 configuration
: "${S3_CONFIG:?Missing S3_CONFIG}"
: "${S3_BUCKET:?Missing S3_BUCKET}"
: "${S3_PREFIX:?Missing S3_PREFIX}"
Expand Down Expand Up @@ -49,54 +49,58 @@ s3_get_chunks() {
S3_PATH="$1"
CHUNK_DIR="$2"

s3cmd --config "$S3_CONFIG" get -r "s3://$S3_BUCKET/$S3_PREFIX/$S3_PATH" "$CHUNK_DIR" > /dev/null
s3cmd --config "$S3_CONFIG" get -r "s3://$S3_BUCKET/$S3_PREFIX/$S3_PATH" "$CHUNK_DIR" >/dev/null
}

s3_put_chunk() {
S3_PATH="$1"
CHUNK_FILE="$2"

s3cmd --config "$S3_CONFIG" put --no-preserve "$CHUNK_FILE" "s3://$S3_BUCKET/$S3_PREFIX/$S3_PATH/" > /dev/null
s3cmd --config "$S3_CONFIG" put --no-preserve "$CHUNK_FILE" "s3://$S3_BUCKET/$S3_PREFIX/$S3_PATH/" >/dev/null
}

s3_rm_chunks() {
CHUNK_LIST="$1"

xargs -n1000 s3cmd --config "$S3_CONFIG" rm < "$CHUNK_LIST" > /dev/null
xargs -n1000 s3cmd --config "$S3_CONFIG" rm <"$CHUNK_LIST" >/dev/null
}

# Define functions for Azure operations
azure_list_days() {
az storage blob directory list --container-name "$AZURE_CONTAINER_NAME" --directory-path "$AZURE_PREFIX" --prefix "$AZURE_PREFIX/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | awk '{print $1}'
az storage fs file list --file-system "$AZURE_CONTAINER_NAME" --path "$AZURE_PREFIX" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | awk '{print $9}' | sed "s#$AZURE_PREFIX/##" | sed 's/\/.*$//' | uniq
}

azure_list_indices() {
AZURE_PATH="$1"
az storage blob directory list --container-name "$AZURE_CONTAINER_NAME" --directory-path "$AZURE_PREFIX" --prefix "${AZURE_PREFIX}/${AZURE_PATH}/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | awk '{print $1}'
AZURE_PATH="$1"

az storage fs file list --file-system "$AZURE_CONTAINER_NAME" --path "${AZURE_PREFIX}/${AZURE_PATH}/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | awk '{print $9}' | sed "s#${AZURE_PREFIX}/${AZURE_PATH}/##" | sed 's/\/.*$//' | uniq
}

azure_list_chunks() {
AZURE_PATH="$1"
az storage blob list --container-name "$AZURE_CONTAINER_NAME" --prefix "${AZURE_PREFIX}/${AZURE_PATH}/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | grep '\.gz\|\.zst' | awk '{print $1}'
AZURE_PATH="$1"

az storage fs file list --file-system "$AZURE_CONTAINER_NAME" --path "${AZURE_PREFIX}/${AZURE_PATH}/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | grep '\.gz\|\.zst' | awk '{print $9}' | sed "s#${AZURE_PREFIX}/${AZURE_PATH}/##"
}

azure_get_chunks() {
AZURE_PATH="$1"
CHUNK_DIR="$2"
az storage blob download-batch -d "$CHUNK_DIR" --pattern '*.gz' --pattern '*.zst' --source "${AZURE_CONTAINER_NAME}/${AZURE_PREFIX}/${AZURE_PATH}" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" > /dev/null
AZURE_PATH="$1"
CHUNK_DIR="$2"

az storage fs directory download --destination-path "$CHUNK_DIR" --file-system "${AZURE_CONTAINER_NAME}" --source-path "${AZURE_PREFIX}/${AZURE_PATH}/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --recursive >/dev/null
}

azure_put_chunk() {
AZURE_PATH="$1"
CHUNK_FILE="$2"
az storage blob upload --file "$CHUNK_FILE" --container-name "$AZURE_CONTAINER_NAME" --name "${AZURE_PREFIX}/${AZURE_PATH}/$(basename "$CHUNK_FILE")" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" > /dev/null
AZURE_PATH="$1"
CHUNK_FILE="$2"

az storage blob upload --file "$CHUNK_FILE" --container-name "$AZURE_CONTAINER_NAME" --name "${AZURE_PREFIX}/${AZURE_PATH}/$(basename "$CHUNK_FILE")" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" >/dev/null
}

azure_rm_chunks() {
CHUNK_LIST="$1"
while IFS= read -r line; do
az storage blob delete --container-name "$AZURE_CONTAINER_NAME" --name "${AZURE_PREFIX}/${line}" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" > /dev/null
done < "$CHUNK_LIST"
CHUNK_LIST="$1"
while IFS= read -r line; do
az storage blob delete --container-name "$AZURE_CONTAINER_NAME" --name "${AZURE_PREFIX}/${line}" --delete-snapshots "include" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" >/dev/null
done <"$CHUNK_LIST"
}

merge_chunks() {
Expand Down Expand Up @@ -135,9 +139,9 @@ merge_chunks() {
fi

if [ "$STORAGE_SERVICE" = "azure" ]; then
echo "azure://${AZURE_CONTAINER_NAME}/${AZURE_PREFIX}/${DAY}/${FILE/$LM_TMP\//}" >> "$TMPFILE.idx"
echo "${DAY}/${FILE/$LM_TMP\//}" >>"$TMPFILE.idx"
else
echo "s3://$S3_BUCKET/$S3_PREFIX/$DAY/${FILE/$LM_TMP\//}" >> "$TMPFILE.idx"
echo "s3://$S3_BUCKET/$S3_PREFIX/$DAY/${FILE/$LM_TMP\//}" >>"$TMPFILE.idx"
fi

zstd --rm -c -d "$FILE"
Expand Down Expand Up @@ -168,7 +172,6 @@ merge_chunks() {
done
}


if [[ "$STORAGE_SERVICE" == "azure" ]]; then
days=$(azure_list_days)
else
Expand Down
21 changes: 11 additions & 10 deletions helmfile.d/charts/log-manager/files/retention.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,25 @@ s3_list_chunks() {
s3_rm_chunks() {
CHUNK_LIST="$1"

xargs -n1000 s3cmd --config "$S3_CONFIG" rm < "$CHUNK_LIST" > /dev/null
xargs -n1000 s3cmd --config "$S3_CONFIG" rm <"$CHUNK_LIST" >/dev/null
}

# Define Azure Blob functions
azure_list_days() {
az storage blob directory list --container-name "$AZURE_CONTAINER_NAME" --directory-path "$AZURE_PREFIX" --prefix "$AZURE_PREFIX/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | awk '{print $1}'
az storage fs file list --file-system "$AZURE_CONTAINER_NAME" --path "$AZURE_PREFIX" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | awk '{print $9}' | sed "s#$AZURE_PREFIX/##" | sed 's/\/.*$//' | uniq
}

azure_list_chunks() {
AZURE_PATH="$1"
az storage blob list --container-name "$AZURE_CONTAINER_NAME" --prefix "${AZURE_PREFIX}/${AZURE_PATH}/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | grep '\.gz\|\.zst' | awk '{print $1}'
AZURE_PATH="$1"

az storage fs file list --file-system "$AZURE_CONTAINER_NAME" --path "${AZURE_PREFIX}/${AZURE_PATH}/" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" --output tsv | grep '\.gz\|\.zst' | awk '{print $9}' | sed "s#${AZURE_PREFIX}/${AZURE_PATH}/##"
}

azure_rm_chunks() {
CHUNK_LIST="$1"
while IFS= read -r line; do
az storage blob delete --container-name "$AZURE_CONTAINER_NAME" --name "${AZURE_PREFIX}/${line}" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" > /dev/null
done < "$CHUNK_LIST"
CHUNK_LIST="$1"
while IFS= read -r line; do
az storage blob delete --container-name "$AZURE_CONTAINER_NAME" --name "${AZURE_PREFIX}/${line}" --delete-snapshots "include" --connection-string "$AZURE_STORAGE_CONNECTION_STRING" >/dev/null
done <"$CHUNK_LIST"
}

# Main loop
Expand All @@ -61,7 +62,7 @@ if [[ "$STORAGE_SERVICE" == "azure" ]]; then
if [[ "$DAY" < "$LIMIT" ]]; then
echo "- day: $DAY -----"
echo "----- listing Azure chunks"
azure_list_chunks "$DAY" > "$TMPFILE"
azure_list_chunks "$DAY" >"$TMPFILE"
echo "----- clearing Azure chunks"
azure_rm_chunks "$TMPFILE"
fi
Expand All @@ -71,7 +72,7 @@ else
if [[ "$DAY" < "$LIMIT" ]]; then
echo "- day: $DAY -----"
echo "----- listing S3 chunks"
s3_list_chunks "$DAY" > "$TMPFILE"
s3_list_chunks "$DAY" >"$TMPFILE"
echo "----- clearing S3 chunks"
s3_rm_chunks "$TMPFILE"
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,15 @@ spec:
name: {{ include "log-manager.fullname" $ }}-storage-credentials
key: azure-storage-connection-string
- name: AZURE_CONTAINER_NAME
value: {{ $.Values.azure.containerName }}
value: {{ .bucket }}
- name: AZURE_PREFIX
value: logs
value: {{ .prefix }}
- name: STORAGE_SERVICE
value: azure
- name: AZCOPY_BUFFER_GB
value: {{ $.Values.azure.azureCopyBufferGB | quote }}
- name: AZCOPY_CONCURRENCY_VALUE
value: {{ $.Values.azure.azureCopyConcurrency | quote }}
{{- end }}
- name: COMPACT_DAYS
value: {{ .compaction.days | quote }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ spec:
name: {{ include "log-manager.fullname" $ }}-storage-credentials
key: azure-storage-connection-string
- name: AZURE_CONTAINER_NAME
value: {{ $.Values.azure.containerName }}
value: {{ .bucket }}
- name: AZURE_PREFIX
value: logs
value: {{ .prefix }}
- name: STORAGE_SERVICE
value: azure
{{- end }}
Expand Down
2 changes: 2 additions & 0 deletions helmfile.d/charts/log-manager/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ azure:
storageAccountName: set-me
storageAccountKey: set-me
containerName: set-me
azureCopyBufferGB: 0.3
azureCopyConcurrency: 8


instances:
Expand Down
13 changes: 3 additions & 10 deletions helmfile.d/values/fluentd/aggregator-common.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ extraDeploy:
image:
registry: ghcr.io
repository: elastisys/fluentd
tag: v5.19.0-ck8s1
tag: v5.19.1-ck8s1
## Specify a imagePullPolicy
## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent'
## ref: https://kubernetes.io/docs/user-guide/images/#pre-pulling-images
Expand Down Expand Up @@ -163,18 +163,11 @@ aggregator:

force_path_style {{ .Values.objectStorage.s3.forcePathStyle }}
{{- else if eq .Values.objectStorage.type "azure" }}
@type azure-storage-append-blob
@type azurestorage

azure_storage_account {{ .Values.objectStorage.azure.storageAccountName }}
azure_storage_access_key {{ .Values.objectStorage.azure.storageAccountKey }}
path logs/${tag}/%Y%m%d/
azure_container_name {{ .Values.objectStorage.buckets.audit }}
auto_create_container true
buffer_type file
buffer_path /var/log/fluent/azureblob/
flush_interval 60s
retry_wait 10s
max_retry 12
auto_create_container false
{{- end }}

audit.output.conf: |
Expand Down
3 changes: 2 additions & 1 deletion helmfile.d/values/fluentd/log-manager.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
azure:
storageAccountName: {{ .Values.objectStorage.azure.storageAccountName }}
storageAccountKey: {{ .Values.objectStorage.azure.storageAccountKey }}
containerName: {{ .Values.objectStorage.buckets.audit }}
enabled: true
azcopyBufferGB: {{ .Values.fluentd.logManager.compaction.azureCopyBufferGB }}
azcopyConcurrency: {{ .Values.fluentd.logManager.compaction.azureCopyConcurrency }}
{{- else if eq .Values.objectStorage.type "s3"}}
s3:
forcePathStyle: {{ .Values.objectStorage.s3.forcePathStyle }}
Expand Down

0 comments on commit e5d2b97

Please sign in to comment.