-
Notifications
You must be signed in to change notification settings - Fork 0
477 lines (403 loc) · 25.1 KB
/
execute.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
name: Crawl Data
on:
# workflow_dispatch enables to trigger the workflow manually from Github Actions UI.
# Refer to https://docs.github.com/en/actions/reference/events-that-trigger-workflows#workflow_dispatch for more information.
workflow_dispatch:
inputs:
data:
# This is the input parameter which contains full downloadable link to a XLSX file with
# the same format of the files under data/ folder in this repo.
# The input can be name of the file under data/ folder or a full downloadable link to a XLSX file.
description: ' The data link which contains list of licensee/licensor info. (e.g sample_data.xlsx) The link should be a public link which can be downloaded by wget command. **** ONLY XLSX FILE IS SUPPORTED ! **** '
required: false
default: sample_data.xlsx
send_data_on_completion:
# If this option is not enabled, all files retrieved from Orbis database batch result will be delivered as ZIP file.
# If not enabled, in case of a failure, already downlodaded files will be lost.
# If enabled, as soon as a file is downloaded from Orbis database, it will be sent to Slack channel.
# No need to wait for the entire process to be completed.
description: 'To receive data on search completion, check this option'
type: boolean
required: false
default: false
activate_parallel_exec:
# If this option is enabled, the program will run in parallel mode.
# It means that the program will spin up multiple chrome instances.
# Then, uploads licensee/licensor files to Orbis database and fetches the results in parallel.
# This option is not adviced to be enabled during working hours since the maximum number of simultaneous user access to Orbis database is 5.
description: 'Enable to run the program in parallel. [ NOT ADVICED DURING WORKING HOURS - SINCE # OF MAX SIMULTANEOUS USER ACCESS IS 5 ]'
type: boolean
required: false
default: false
notify:
# When this option is checked, the program will send a notification to Slack channel when the program is started/failed or completed.
description: 'To receive notifications and all search result files in one zip file on Slack channel, check this option'
type: boolean
required: false
default: false
check_on_sec:
# When this option is checked, the program will check SEC.GOV website for additional data to aggregate.
# This option is not adviced at the time of writing this document since SEC.GOV website does not provide data in stable way.
# There is a distinguish between SEC API and SEC.GOV website. SEC API is stable and provides data in a structured way.
# SEC.GOV website is not stable and does not provide data in a structured way, parsing of variety of HTML structures are required.
# Main reasoning behind still keeping this option is that providing option to experiment with SEC.GOV website.
# If you are not sure about this option, please keep it unchecked.
description: 'SEC.GOV website will be checked for additional data to aggregate. Check this option to enable it.'
type: boolean
required: false
default: false
repository_dispatch:
# This event is placed to trigger the workflow from Slack channel.
types: crawl-data
env:
# Confidential credentials and data are stored in Github Secrets.
# Please refer to https://docs.github.com/en/actions/reference/encrypted-secrets for more information.
ORBIS_EMAIL_ADDRESS: ${{ secrets.ORBIS_EMAIL_ADDRESS }}
ORBIS_PASSWORD: ${{ secrets.ORBIS_PASSWORD }}
ORBIS_ACCESS_URL: ${{ secrets.ORBIS_ACCESS_URL }}
ORBIS_BATCH_SEARCH_URL: ${{ secrets.ORBIS_BATCH_SEARCH_URL }}
ORBIS_LOGOUT_URL: ${{ secrets.ORBIS_LOGOUT_URL }}
# Directories to store data and logs in Github Actions runner.
# Following variables are used in self-hosted Github instance.
# Refer to self-hosted Github instances, https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
DATA_DIR: /home/thinkpad/Desktop/github-self-hosted/actions-runner/_study/orbi/orbi/data/
LOG_DIR: /home/thinkpad/Desktop/github-self-hosted/actions-runner/_study/orbi/orbi/logs/
LOCAL_DEV: False
DEFAULT_DATA_SOURCE: sample_data.xlsx
# Try to run the program 3 times if it fails
MAX_RUN_ATTEMPTS: 1
# If it is unable to fetch all data in 4 hours, the crawler will face with timeout
RUN_TIMEOUT_IN_MN: 360
# Following DATA_DIR, LOG_DIR variables are used when using Github instances.
# Refer to Github instances, https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners
# Uncomment the lines inside the block below if you are using Github instances.
#-------------------------------------------------------
# DATA_DIR: /home/runner/work/orbi/orbi/data/
# LOG_DIR: /home/runner/work/orbi/orbi/logs/
#-------------------------------------------------------
# This part is used to upload the result to AWS S3 bucket.
# Enable following variables if you want to upload the result to AWS S3 bucket.
# For setting secrets in Github Actions, please refer to https://docs.github.com/en/actions/reference/encrypted-secrets
#-------------------------------------------------------
# AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
# AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
# AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
# SOURCE_DIR: /home/runner/work/orbi/orbi/
#-------------------------------------------------------
# This part is used to send notification to Slack channel and upload the result to Slack channel.
# Reference for SLACK WEBHOOK URL: https://api.slack.com/messaging/webhooks
# Reference for SLACK TOKEN: https://api.slack.com/authentication/token-types#granular_bot
# Reference for SLACK CHANNEL: https://slack.com/intl/en-tr/help/articles/201402297-Create-a-channel
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} # keep in mind not name of the channel but ID of the channel
SLACK_USERNAME: orbi
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
SLACK_FOOTER: '🤓 automated bot Orbi'
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
# At the end of the program, available data is uplodaded to Github artifactory and stored for 20 days.
# This part is defining the retention period of the data in Github artifactory.
# Refer to https://docs.github.com/en/actions/guides/storing-workflow-data-as-artifacts for more information.
RETENTION_PERIOD: 20 # in days
# Receiving value of CHECK_ON_SEC from the user input in the workflow
CHECK_ON_SEC: ${{ github.event.inputs.check_on_sec }}
permissions:
contents: read
jobs:
search_on_sec_gov:
# This job is used to fetch data from SEC API using crawl.py file in the repository.
# Running on Github hosted environment.
runs-on: self-hosted
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
# Installing dependencies of the project
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
# get timestamp in format DD-MM-YYYY
- name: Get timestamp
id: get-timestamp
run: |
echo "timestamp=$(date +'%d_%m_%Y')" >> $GITHUB_OUTPUT
# Receives file name from the user input in the workflow and stores it in the GITHUB_OUTPUT file.
# GITHUB_OUTPUT file is used to pass variables between jobs.
# Refer to https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#setting-an-output-parameter for more information.
- name: Get input file name
if: ${{ github.event.inputs.data != '${{ env.DEFAULT_DATA_SOURCE }}' }}
id: get-file-name
run: |
echo "file_name=$(basename ${{ github.event.inputs.data }})" >> $GITHUB_OUTPUT
# Crawling data from SEC API using crawl.py file in the repository.
# Crawling data for licensee
- name: (LICENSEE) Crawl data from SEC.GOV
run: |
python orbi/crawl.py --source_file ${{ env.DATA_DIR }}${{ steps.get-file-name.outputs.file_name }} --licensee
# Crawling data from SEC API using crawl.py file in the repository.
# Crawling data for licensor
- name: (LICENSOR) Crawl data from SEC.GOV
run: |
python orbi/crawl.py --source_file ${{ env.DATA_DIR }}${{ steps.get-file-name.outputs.file_name }} --no-licensee
# Zipping crawled data
- name: ZIP JSON and CSV files
run: "zip -j -r ${{ env.DATA_DIR }}sec-api-data-${{ steps.get-timestamp.outputs.timestamp }}.zip ${{ env.DATA_DIR }} -x '*.xlsx' \n"
# Send crawled data to Slack channel
- name: Send LICENSEE/LICENSOR files to Slack
run: |
python utils/send_to_slack.py --file_path ${{ env.DATA_DIR }}/sec-api-data-${{ steps.get-timestamp.outputs.timestamp }}.zip --slack_channel orbis-data --message "LICENSEE/LICENSOR files are created from SEC API"
continue-on-error: true # in case of error, continue to next step
# THIS PART NEEDS TO BE UPDATED WITH AN ACTIVE AZURE ACCOUNT CREDENTIALS AND CONTAINER NAME
# General usage from local: python utils/upload_to_azure.py <file/dir> <container> <blob>
# Refer Azure Storage Account: https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python
# Uncomment the part inside dotted lines and set the environment variables to upload data to Azure Blob.
#-------------------------------------------------------
# - name: Upload to Azure Blob
# run: |
# pip install azure-storage-blob
# python utils/upload_to_azure.py --dir_file_path /home/runner/work/orbi/orbi/data --container_name ${{ env.AZURE_CONTAINER_NAME }} --blob_name orbis-data-${{ steps.get-timestamp.outputs.timestamp }}
# env:
# ACCOUNT_NAME: ${{ secrets.AZURE_STORAGE_ACCOUNT_NAME }}
# ACCOUNT_KEY: ${{ secrets.AZURE_STORAGE_ACCOUNT_KEY }}
# RETENTION_DAYS: 30
# AZURE_CONTAINER_NAME: sec-gov-data
# SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
# continue-on-error: true
#-------------------------------------------------------
# Uploading data to Github artifactory with the retention period defined above.
# Refer to https://docs.github.com/en/actions/guides/storing-workflow-data-as-artifacts for more information.
# Consider limitations of Github artifactory: https://docs.github.com/en/actions/reference/usage-limits-billing-and-administration#usage-limits
# excludes
- name: Upload data to artifact
uses: actions/upload-artifact@v3
with:
name: sec-gov-data-${{ steps.get-timestamp.outputs.timestamp }}.zip
path: ${{ env.DATA_DIR }}
retention-days: ${{ env.RETENTION_PERIOD }}
continue-on-error: true
# -----------------------RUN ON ORBIS JOB-------------------------------------------
search_on_orbis:
# This job is running on self-hosted instance with Python 3.10 and Ubuntu 20.04
# Main reasoning behind this to avoid time limitations of Github hosted runners.
# Refer to https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration for more information.
# This job is running after search_on_sec_gov job is completed.
runs-on: self-hosted
# needs: search_on_sec_gov
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
#**********************************************************************************************************************
# PRECHECKS
#**********************************************************************************************************************
- name: Check input file name
id: check_input_file_name
# check if the input file name is referring to a file or url
run: |
if [[ ${{ github.event.inputs.data }} == http* ]]; then
echo "is_url=true" >> $GITHUB_OUTPUT
else
echo "is_url=false" >> $GITHUB_OUTPUT
fi
- name: Get input file name
if: ${{ github.event.inputs.data != '${{ env.DEFAULT_DATA_SOURCE }}' }}
id: get-file-name
run: |
echo "file_name=$(basename ${{ github.event.inputs.data }})" >> $GITHUB_OUTPUT
- name: Check file existence
if: ${{ steps.check_input_file_name.outputs.is_url == 'false' }}
run: |
if [ ! -f ${{ env.DATA_DIR }}${{ steps.get-file-name.outputs.file_name }} ]; then
echo "File ${{ steps.get-file-name.outputs.file_name }} does not exist in ${{ env.DATA_DIR }}. Exiting..."
exit 1
fi
- name: Download data if provided
# If the input is a url, download the file and move it to data directory
if: ${{ steps.check_input_file_name.outputs.is_url == 'true' }}
run: "wget ${{ github.event.inputs.data }}\nmv ${{ steps.get-file-name.outputs.file_name }} ${{ env.DATA_DIR }} \n"
- name: Check file existence
# Check if the file exists in data directory
if: ${{ steps.check_input_file_name.outputs.is_url == 'false' }}
run: |
if [ ! -f ${{ env.DATA_DIR }}${{ steps.get-file-name.outputs.file_name }} ]; then
echo "File ${{ steps.get-file-name.outputs.file_name }} does not exist in ${{ env.DATA_DIR }}. Exiting..."
exit 1
fi
- name: Get timestamp
# Get timestamp in format DD-MM-YYYY
id: get-timestamp
run: |
echo "timestamp=$(date +'%d_%m_%Y')" >> $GITHUB_OUTPUT
#**********************************************************************************************************************
# INSTALL DEPENDENCIES
#**********************************************************************************************************************
- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Send Job Started Notification to Slack
# Notify on Slack that the job has started if notify input is true
if: ${{ github.event.inputs.notify == 'true' }}
uses: rtCamp/action-slack-notify@v2.2.0
env:
SLACK_COLOR: ${{ job.status }} # or a specific color like 'good' or '#ff00ff'
SLACK_MESSAGE: ':bell: Orbi is started crawl data from Orbis database on Github using the file ${{ github.event.inputs.data }} as input data.'
SLACK_TITLE: ':bell: Orbi is working on crawling data from Orbis database on Github'
#**********************************************************************************************************************
# RUN Orbi for batch search on Orbis
#**********************************************************************************************************************
- name: Execute the script
uses: nick-fields/retry@v2
# Uses retry action to retry the script if it fails
id: batch_search
with:
timeout_minutes: ${{ env.RUN_TIMEOUT_IN_MN }}
max_attempts: ${{ env.MAX_RUN_ATTEMPTS }} # 3 times retry if failed
command: |
python orbi/orbi.py
env:
# Pass the environment variables to the script
DATA_SOURCE: ${{ steps.get-file-name.outputs.file_name }}
SEND_DATA_ON_COMPLETION: ${{ github.event.inputs.send_data_on_completion }}
CHECK_ON_SEC: ${{ github.event.inputs.check_on_sec }}
PARALLEL_EXECUTION: ${{ github.event.inputs.activate_parallel_exec }}
SLACK_CHANNEL: ${{ env.SLACK_CHANNEL }}
#**********************************************************************************************************************
# POST PROCESSING STEPS
#**********************************************************************************************************************
- name: Send Job Failed Notification to Slack
# Notify on Slack that the job has failed if notify input is true
if: ${{ failure() && steps.batch_search.conclusion == 'failure' && github.event.inputs.notify == 'true' }}
uses: rtCamp/action-slack-notify@v2.2.0
env:
SLACK_COLOR: ${{ job.status }} # or a specific color like 'good' or '#ff00ff'
SLACK_MESSAGE: ':x: Failed to run batch search, please check on Github. '
SLACK_TITLE: ':x: Batch search failed at some point, in most cases this is caused due to slowness in the network (since it causes slow rendering of the page and leads to have no element we are looking for) !'
# ENABLE THIS STEP IF YOU WANT TO UPLOAD THE DATA TO AZURE BLOB
# SET YOUR OWN CREDENTIALS IN THE SECRETS OF THE REPO
# Example usage locally: python utils/upload_to_azure.py <file/dir> <container> <blob>
#
#-------------------------------------------------------
# - name: Upload to Azure Blob
# run: |
# pip install azure-storage-blob
# python utils/upload_to_azure.py --dir_file_path ${{ env.DATA_DIR }} --container_name ${{ env.AZURE_CONTAINER_NAME }} --blob_name orbis-data-${{ steps.get-timestamp.outputs.timestamp }}
# env:
# ACCOUNT_NAME: ${{ secrets.AZURE_STORAGE_ACCOUNT_NAME }}
# ACCOUNT_KEY: ${{ secrets.AZURE_STORAGE_ACCOUNT_KEY }}
# RETENTION_DAYS: 7
# AZURE_CONTAINER_NAME: ${{ secrets.AZURE_CONTAINER_NAME }}
# continue-on-error: true
#-------------------------------------------------------
- name: Zip data
# zip the data except sample_data.xlsx
run: |
zip -j -r orbis-data-${{ steps.get-timestamp.outputs.timestamp }}.zip '${{ env.DATA_DIR }}' -x '${{ env.DATA_DIR }}sample_data.xlsx' -x '${{ env.DATA_DIR }}sample_data_big.xlsx' -x '${{ env.DATA_DIR }}${{ steps.get-file-name.outputs.file_name }}'
- name: Send data to slack channel
# If input notify is true, send the data to slack channel
if: ${{ success() && github.event.inputs.notify == 'true' }}
run: |
python utils/send_to_slack.py --file_path orbis-data-${{ steps.get-timestamp.outputs.timestamp }}.zip --slack_channel ${{ env.SLACK_CHANNEL }} --message ' Prepared data from Orbis database search is done !. orbis-data-${{ steps.get-timestamp.outputs.timestamp }}.zip is attached.'
continue-on-error: true
- name: Send data to Telegram
# Sends the data to telegram channel
# Set your own credentials in the secrets of the repo
# Attach the file to the message
run: |
curl -F chat_id=${{ secrets.TELEGRAM_CHAT_ID }} -F document=@orbis-data-${{ steps.get-timestamp.outputs.timestamp }}.zip https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendDocument
continue-on-error: true
- name: Upload data to artifact
# Upload the data to Github artifact
# Refer: https://github.com/actions/upload-artifact
uses: actions/upload-artifact@v3
with:
name: orbis-data-${{ steps.get-timestamp.outputs.timestamp }}.zip
path: |
${{ env.DATA_DIR }}
!'${{ env.DATA_DIR }}sample_data.xlsx'
!'${{ env.DATA_DIR }}sample_data_big.xlsx'
!'${{ env.DATA_DIR }}sample_data_big_v2.xlsx'
retention-days: ${{ env.RETENTION_PERIOD }}
continue-on-error: true
- name: Send Job Success Notification to Slack
# Notify on SLACK that the job has completed successfully if notify input is true
if: ${{ success() && github.event.inputs.notify == 'true' }}
uses: rtCamp/action-slack-notify@v2.2.0
env:
SLACK_COLOR: ${{ job.status }} # or a specific color like 'good' or '#ff00ff'
SLACK_MESSAGE: ':rocket: Data is available to download from artifacts: link to artifacts: htttps://github.com/mrtrkmn/orbi/actions/runs/${{ github.run_id }}'
# SLACK_MESSAGE: ':rocket: Operation is completed data link: https://${{ secrets.AWS_S3_BUCKET }}.s3.${{ secrets.AWS_REGION }}.amazonaws.com/orbis-data-${{ steps.get-timestamp.outputs.timestamp }}/ For more information check README page of the project: https://github.com/mrtrkmn/orbi'
SLACK_TITLE: Data is ready to download append file name to the link !
SLACK_USERNAME: orbi
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
SLACK_FOOTER: '🤓 automated bot Orbi'
# -----------------------AGGREGATE DATA JOB-------------------------------------------
aggregate_data:
needs: [search_on_sec_gov, search_on_orbis]
runs-on: self-hosted
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
# Installing dependencies of the project
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
# get timestamp in format DD-MM-YYYY
- name: Get timestamp
id: get-timestamp
run: |
echo "timestamp=$(date +'%d_%m_%Y')" >> $GITHUB_OUTPUT
- name: Download orbis data from artifact
uses: actions/download-artifact@v3
with:
name: orbis-data-${{ steps.get-timestamp.outputs.timestamp }}.zip
path: ${{ env.DATA_DIR }}
continue-on-error: true
- name: Download SEC API data from artifact
uses: actions/download-artifact@v3
with:
name: sec-gov-data-${{ steps.get-timestamp.outputs.timestamp }}.zip
path: ${{ env.DATA_DIR }}
continue-on-error: true
- name: Unzip data
run: |
unzip ${{ env.DATA_DIR }}/orbis-data-${{ steps.get-timestamp.outputs.timestamp }}.zip -d ${{ env.DATA_DIR }}
unzip ${{ env.DATA_DIR }}/sec-gov-data-${{ steps.get-timestamp.outputs.timestamp }}.zip -d ${{ env.DATA_DIR }}
continue-on-error: true
- name: Get event input file name
id: input-data-file-name
run: |
if [[ "${{ github.event.inputs.input_file }}" == *"https"* ]]; then
echo "file_name=$(basename ${{ github.event.inputs.input_file }})" >> $GITHUB_OUTPUT
else
echo "file_name=${{ github.event.inputs.input_file }}" >> $GITHUB_OUTPUT
fi
- name: Aggregate data
run: |
python utils/merge.py --orbis_output_file "${{ env.DATA_DIR }}orbis_data_licensee_${{ steps.get-timestamp.outputs.timestamp }}.xlsx" --sec_output_file "${{ env.DATA_DIR }}company_facts_${{ steps.get-timestamp.outputs.timestamp }}_licensee.csv" --merged_output_file "${{ env.DATA_DIR }}merged_data_${{ steps.get-timestamp.outputs.timestamp }}_licensee.xlsx" --searched_raw_input_file "${{ env.DATA_DIR }}${{ steps.input-data-file-name.outputs.file_name }}"
python utils/merge.py --orbis_output_file "${{ env.DATA_DIR }}orbis_data_licensor_${{ steps.get-timestamp.outputs.timestamp }}.xlsx" --sec_output_file "${{ env.DATA_DIR }}company_facts_${{ steps.get-timestamp.outputs.timestamp }}_licensor.csv" --merged_output_file "${{ env.DATA_DIR }}merged_data_${{ steps.get-timestamp.outputs.timestamp }}_licensor.xlsx" --searched_raw_input_file "${{ env.DATA_DIR }}${{ steps.input-data-file-name.outputs.file_name }}"
- name: Zip merged files
run: |
zip -r merged_data_${{ steps.get-timestamp.outputs.timestamp }}.zip ${{ env.DATA_DIR }}merged_data_${{ steps.get-timestamp.outputs.timestamp }}_licensee.xlsx ${{ env.DATA_DIR }}merged_data_${{ steps.get-timestamp.outputs.timestamp }}_licensor.xlsx
continue-on-error: true
- name: Upload merged files to artifacts
uses: actions/upload-artifact@v3
with:
name: merged-data-${{ steps.get-timestamp.outputs.timestamp }}.zip
path: merged_data_${{ steps.get-timestamp.outputs.timestamp }}.zip
retention-days: ${{ env.RETENTION_PERIOD }}
continue-on-error: true
- name: Send data to slack channel
# If input notify is true, send the data to slack channel
if: ${{ success() && github.event.inputs.notify == 'true' }}
run: |
python utils/send_to_slack.py --file_path "${{ env.DATA_DIR }}/merged_data_${{ steps.get-timestamp.outputs.timestamp }}_licensee.xlsx" --slack_channel ${{ env.SLACK_DATA_CHANNEL }} --message ' Prepared data from Orbis database search is done !. merged_data_${{ steps.get-timestamp.outputs.timestamp }}_licensee.xlsx is attached.'
python utils/send_to_slack.py --file_path "${{ env.DATA_DIR }}/merged_data_${{ steps.get-timestamp.outputs.timestamp }}_licensor.xlsx" --slack_channel ${{ env.SLACK_DATA_CHANNEL }} --message ' Prepared data from Orbis database search is done !. merged_data_${{ steps.get-timestamp.outputs.timestamp }}_licensor.xlsx is attached.'
continue-on-error: true