Add more test cases

jsoriano · jsoriano · commit 3a9d6072ec5f · 2025-09-11T18:17:28.000+02:00
diff --git a/test/packages/other/tags_without_assets/data_stream/foo/agent/stream/filestream.yml.hbs b/test/packages/other/tags_without_assets/data_stream/foo/agent/stream/filestream.yml.hbs
@@ -0,0 +1,44 @@
+paths:
+{{#each paths as |path|}}
+  - {{path}}
+{{/each}}
+{{#if exclude_files}}
+prospector.scanner.exclude_files:
+{{#each exclude_files as |pattern f|}}
+  - {{pattern}}
+{{/each}}
+{{/if}}
+{{#if multiline_json}}
+multiline.pattern: '^{'
+multiline.negate: true
+multiline.match: after
+multiline.max_lines: 5000
+multiline.timeout: 10
+{{/if}}
+{{#if custom}}
+{{custom}}
+{{/if}}
+
+{{#if tags.length}}
+tags:
+{{#each tags as |tag|}}
+- {{tag}}
+{{/each}}
+{{#if preserve_original_event}}
+- preserve_original_event
+{{/if}}
+{{else}}
+{{#if preserve_original_event}}
+tags:
+- preserve_original_event
+{{/if}}
+{{/if}}
+
+{{#contains "forwarded" tags}}
+publisher_pipeline.disable_host: true
+{{/contains}}
+
+{{#if processors}}
+processors:
+{{processors}}
+{{/if}}
diff --git a/test/packages/other/tags_without_assets/data_stream/foo/elasticsearch/ingest_pipeline/default.yml b/test/packages/other/tags_without_assets/data_stream/foo/elasticsearch/ingest_pipeline/default.yml
@@ -0,0 +1,10 @@
+---
+description: Pipeline for processing sample logs
+processors:
+- set:
+    field: sample_field
+    value: "1"
+on_failure:
+- set:
+    field: error.message
+    value: '{{ _ingest.on_failure_message }}'
diff --git a/test/packages/other/tags_without_assets/data_stream/foo/fields/base-fields.yml b/test/packages/other/tags_without_assets/data_stream/foo/fields/base-fields.yml
@@ -0,0 +1,12 @@
+- name: data_stream.type
+  type: constant_keyword
+  description: Data stream type.
+- name: data_stream.dataset
+  type: constant_keyword
+  description: Data stream dataset.
+- name: data_stream.namespace
+  type: constant_keyword
+  description: Data stream namespace.
+- name: '@timestamp'
+  type: date
+  description: Event timestamp.
diff --git a/test/packages/other/tags_without_assets/data_stream/foo/manifest.yml b/test/packages/other/tags_without_assets/data_stream/foo/manifest.yml
@@ -0,0 +1,252 @@
+title: "Just an empty data stream"
+type: logs
+streams:
+  - input: filestream
+    title: "logs via filestream"
+    description: |-
+      Collect logs with filestream
+    template_path: filestream.yml.hbs
+    vars:
+      - name: paths
+        type: text
+        title: "Paths"
+        multi: true
+        required: true
+        show_user: true
+        default:
+          - /var/log/*.log
+      - name: data_stream.dataset
+        type: text
+        title: "Dataset name"
+        description: |-
+          Dataset to write data to. Changing the dataset will send the data to a different index. You can't use `-` in the name of a dataset and only valid characters for [Elasticsearch index names](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html).
+        required: true
+        show_user: true
+        default: filestream.generic
+      - name: pipeline
+        type: text
+        title: "Ingest Pipeline"
+        description: |-
+          The Ingest Node pipeline ID to be used by the integration.
+        show_user: true
+      - name: parsers
+        type: yaml
+        title: "Parsers"
+        description: |-
+          This option expects a list of parsers that the log line has to go through. For more information see [Parsers](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#_parsers)
+        show_user: true
+        default: ""
+        #- ndjson:
+        #    target: ""
+        #    message_key: msg
+        #- multiline:
+        #    type: count
+        #    count_lines: 3
+      - name: exclude_files
+        type: text
+        title: "Exclude Files"
+        description: |-
+          A list of regular expressions to match the files that you want Elastic Agent to ignore. By default no files are excluded.
+        multi: true
+        show_user: true
+        default:
+          - \.gz$
+      - name: include_files
+        type: text
+        title: "Include Files"
+        description: |-
+          A list of regular expressions to match the files that you want Elastic Agent to include. If a list of regexes is provided, only the files that are allowed by the patterns are harvested.
+        multi: true
+        show_user: true
+      - name: processors
+        type: yaml
+        title: "Processors"
+        description: |-
+          Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details.
+      - name: tags
+        type: text
+        title: "Tags"
+        description: |-
+          Tags to include in the published event
+        multi: true
+        show_user: true
+      - name: encoding
+        type: text
+        title: "Encoding"
+        description: |-
+          The file encoding to use for reading data that contains international characters. For a full list of valid encodings, see the [Documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#_encoding_2)
+      - name: recursive_glob
+        type: bool
+        title: "Recursive Glob"
+        description: |-
+          Enable expanding `**` into recursive glob patterns. With this feature enabled, the rightmost `**` in each path is expanded into a fixed number of glob patterns. For example: `/foo/**` expands to `/foo`, `/foo/*`, `/foo/*/*`, and so on. If enabled it expands a single `**` into a 8-level deep `*` pattern.
+          This feature is enabled by default. Set prospector.scanner.recursive_glob to false to disable it.
+        default: true
+      - name: symlinks
+        type: bool
+        title: "Enable symlinks"
+        description: |-
+          The symlinks option allows Elastic Agent to harvest symlinks in addition to regular files. When harvesting symlinks, Elastic Agent opens and reads the original file even though it reports the path of the symlink.
+          **Because this option may lead to data loss, it is disabled by default.**
+      - name: resend_on_touch
+        type: bool
+        title: "Resend on touch"
+        description: |-
+          If this option is enabled a file is resent if its size has not changed but its modification time has changed to a later time than before. It is disabled by default to avoid accidentally resending files.
+      - name: check_interval
+        type: text
+        title: "Check Interval"
+        description: |-
+          How often Elastic Agent checks for new files in the paths that are specified for harvesting. For example Specify 1s to scan the directory as frequently as possible without causing Elastic Agent to scan too frequently. **We do not recommend to set this value <1s.**
+      - name: ignore_older
+        type: text
+        title: "Ignore Older"
+        description: |-
+          If this option is enabled, Elastic Agent ignores any files that were modified before the specified timespan. You can use time strings like 2h (2 hours) and 5m (5 minutes). The default is 0, which disables the setting.
+          You must set Ignore Older to be greater than On State Change Inactive.
+          For more information, please see the [Documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#filebeat-input-filestream-ignore-older)
+      - name: ignore_inactive
+        type: text
+        title: "Ignore Inactive"
+        description: |-
+          If this option is enabled, Elastic Agent ignores every file that has not been updated since the selected time. Possible options are since_first_start and since_last_start.
+      - name: close_on_state_changed_inactive
+        type: text
+        title: "Close on State Changed Inactive"
+        description: |-
+          When this option is enabled, Elastic Agent closes the file handle if a file has not been harvested for the specified duration. The counter for the defined period starts when the last log line was read by the harvester. It is not based on the modification time of the file. If the closed file changes again, a new harvester is started and the latest changes will be picked up after Check Interval has elapsed.
+      - name: close_on_state_changed_renamed
+        type: bool
+        title: "Close on State Changed Renamed"
+        description: |-
+          **Only use this option if you understand that data loss is a potential side effect.**
+          When this option is enabled, Elastic Agent closes the file handler when a file is renamed. This happens, for example, when rotating files. By default, the harvester stays open and keeps reading the file because the file handler does not depend on the file name.
+      - name: close_on_state_changed_removed
+        type: bool
+        title: "Close on State Changed Removed"
+        description: |-
+          When this option is enabled, Elastic Agent closes the harvester when a file is removed. Normally a file should only be removed after it’s inactive for the duration specified by close.on_state_change.inactive.
+      - name: close_reader_eof
+        type: bool
+        title: "Close Reader EOF"
+        description: |-
+          **Only use this option if you understand that data loss is a potential side effect.**
+          When this option is enabled, Elastic Agent closes a file as soon as the end of a file is reached. This is useful when your files are only written once and not updated from time to time. For example, this happens when you are writing every single log event to a new file. This option is disabled by default.
+      - name: close_reader_after_interval
+        type: text
+        title: "Close Reader After Interval"
+        description: |-
+          **Only use this option if you understand that data loss is a potential side effect. Another side effect is that multiline events might not be completely sent before the timeout expires.**
+          This option is particularly useful in case the output is blocked, which makes Elastic Agent keep open file handlers even for files that were deleted from the disk.
+          For more information see the [documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#filebeat-input-filestream-close-timeout).
+      - name: clean_inactive
+        type: text
+        title: "Clean Inactive"
+        description: |-
+          **Only use this option if you understand that data loss is a potential side effect.**
+          When this option is enabled, Elastic Agent removes the state of a file after the specified period of inactivity has elapsed.
+          E.g: "30m", Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". By default cleaning inactive states is disabled, -1 is used to disable it.
+        default: -1
+      - name: clean_removed
+        type: bool
+        title: "Clean Removed"
+        description: |-
+          When this option is enabled, Elastic Agent cleans files from the registry if they cannot be found on disk anymore under the last known name.
+          **You must disable this option if you also disable Close Removed.**
+      - name: harvester_limit
+        type: integer
+        title: "Harvester Limit"
+        description: |-
+          The harvester_limit option limits the number of harvesters
+          that are started in parallel for one input. This directly
+          relates to the maximum number of file handlers that are
+          opened. The default is 0 (no limit).
+        default: 0
+      - name: backoff_init
+        type: text
+        title: "Backoff Init"
+        description: |-
+          The backoff option defines how long Elastic Agent waits before checking a file again after EOF is reached. The default is 1s.
+      - name: backoff_max
+        type: text
+        title: "Backoff Max"
+        description: |-
+          The maximum time for Elastic Agent to wait before checking a file again after EOF is reached. The default is 10s.
+          **Requirement: Set Backoff Max to be greater than or equal to Backoff Init and less than or equal to Check Interval (Backoff Init <= Backoff Max <= Check Interval).**
+      - name: fingerprint
+        type: bool
+        title: "File identity: Fingerprint"
+        description: |-
+          **Changing file_identity methods between runs may result in
+          duplicated events in the output.**
+          Uses a fingerprint generated from the first few bytes (1k is
+          the default, this can be configured via Fingerprint offset
+          and length) to identify a file instead inode + device ID.
+          Refer to https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#_file_identity_2
+          for more details. If this option is disabled (and 'Native
+          file identity is not enabled'), Elastic-Agent < 9.0.0 will
+          use Native as the file identity, and >= 9.0.0 will use
+          Fingerprint with the default offset and length.
+        default: true
+      - name: fingerprint_offset
+        type: integer
+        title: "File identity: Fingerprint offset"
+        description: |-
+          Offset from the beginning of the file to start calculating
+          the fingerprint. The default is 0. Only used when the
+          fingerprint file identity is selected
+        default: 0
+      - name: fingerprint_length
+        type: integer
+        title: "File identity: Fingerprint length"
+        description: |-
+          The number of bytes used to calculate the fingerprint. The
+          default is 1024. Only used when the fingerprint file
+          identity is selected.
+        default: 1024
+      - name: file_identity_native
+        type: bool
+        title: "File identity: Native"
+        description: |-
+          **Changing file_identity methods between runs may result in
+          duplicated events in the output.**
+          Uses a native identifier for files, on most Unix-like
+          file systems this is the inode + device ID. On file systems
+          that do not support inode, the native equivalent is used.
+          If you enable this option you **MUST disable Fingerprint
+          file identity**. Refer to
+          https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-filestream
+          for more details.
+        default: false
+      - name: rotation_external_strategy_copytruncate
+        type: yaml
+        title: "Rotation Strategy"
+        description: "If the log rotating application copies the contents of the active file and then truncates the original file, use these options to help Elastic Agent to read files correctly.\nSet the option suffix_regex so Elastic Agent can tell active and rotated files apart. \nThere are two supported suffix types in the input: numberic and date."
+      - name: exclude_lines
+        type: text
+        title: "Exclude Lines"
+        description: |-
+          A list of regular expressions to match the lines that you want Elastic Agent to exclude. Elastic Agent drops any lines that match a regular expression in the list. By default, no lines are dropped. Empty lines are ignored.
+        multi: true
+      - name: include_lines
+        type: text
+        title: "Include Lines"
+        description: |-
+          A list of regular expressions to match the lines that you want Elastic Agent to include. Elastic Agent exports only the lines that match a regular expression in the list. By default, all lines are exported. Empty lines are ignored.
+        multi: true
+      - name: buffer_size
+        type: text
+        title: "Buffer Size"
+        description: |-
+          The size in bytes of the buffer that each harvester uses when fetching a file. The default is 16384.
+      - name: message_max_bytes
+        type: text
+        title: "Message Max Bytes"
+        description: |-
+          The maximum number of bytes that a single log message can have. All bytes after mesage_max_bytes are discarded and not sent. The default is 10MB (10485760).
+      - name: condition
+        type: text
+        title: "Condition"
+        description: |-
+          Condition to filter when to collect this input. See [Dynamic Input Configuration](https://www.elastic.co/guide/en/fleet/current/dynamic-input-configuration.html) for details.
+        show_user: true