Skip to content

Commit

Permalink
update pandas, airflow, etc (#316)
Browse files Browse the repository at this point in the history
  • Loading branch information
max-sixty authored Aug 28, 2019
1 parent 236f105 commit af1df69
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 25 deletions.
11 changes: 0 additions & 11 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
netcat \
pkg-config \
proj-bin \
unixodbc \
unixodbc-dev \
unzip \
vim \
wget \
&& rm -rf /var/lib/apt/lists/*

# install mssql drivers https://msdn.microsoft.com/en-us/library/hh568454(v=sql.110).aspx
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \
&& curl https://packages.microsoft.com/config/debian/9/prod.list > /etc/apt/sources.list.d/mssql-release.list
RUN apt-get update && ACCEPT_EULA=Y apt-get install -y --no-install-recommends \
msodbcsql17 \
mssql-tools \
&& rm -rf /var/lib/apt/lists/*
RUN echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bash_profile

# docker (simple version), from https://docs.docker.com/engine/installation/linux/ubuntu
RUN curl -fsSL get.docker.com | sh

Expand Down
19 changes: 16 additions & 3 deletions docker/airflow/airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,22 @@ api_rev = v3
hide_sensitive_variable_fields = True

[elasticsearch]
elasticsearch_host =
elasticsearch_log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}}
elasticsearch_end_of_log_mark = end_of_log
# Elasticsearch host
host =
# Format of the log_id, which is used to query for a given tasks logs
log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}}
# Used to mark the end of a log stream for a task
end_of_log_mark = end_of_log
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Code will construct log_id using the log_id template from the argument above.
# NOTE: The code will prefix the https:// automatically, don't include that here.
frontend =
# Write the task logs to the stdout of the worker, rather than the default files
write_stdout = False
# Instead of the default log formatter, write the log lines as JSON
json_format = False
# Log fields to also attach to the json output, if enabled
json_fields = asctime, filename, lineno, levelname, message

[kubernetes]
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
Expand Down
25 changes: 20 additions & 5 deletions docker/airflow/airflow_local_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@

LOG_FORMAT = conf.get("core", "LOG_FORMAT")

COLORED_LOG_FORMAT = conf.get("core", "COLORED_LOG_FORMAT")

COLORED_LOG = conf.getboolean("core", "COLORED_CONSOLE_LOG")

COLORED_FORMATTER_CLASS = conf.get("core", "COLORED_FORMATTER_CLASS")

BASE_LOG_FOLDER = conf.get("core", "BASE_LOG_FOLDER")

PROCESSOR_LOG_FOLDER = conf.get("scheduler", "CHILD_PROCESS_LOG_DIRECTORY")
Expand All @@ -72,11 +78,17 @@
# just to help Airflow select correct handler
REMOTE_BASE_LOG_FOLDER = conf.get("core", "REMOTE_BASE_LOG_FOLDER")

ELASTICSEARCH_HOST = conf.get("elasticsearch", "ELASTICSEARCH_HOST")
ELASTICSEARCH_HOST = conf.get("elasticsearch", "HOST")

ELASTICSEARCH_LOG_ID_TEMPLATE = conf.get("elasticsearch", "LOG_ID_TEMPLATE")

ELASTICSEARCH_END_OF_LOG_MARK = conf.get("elasticsearch", "END_OF_LOG_MARK")

ELASTICSEARCH_WRITE_STDOUT = conf.get("elasticsearch", "WRITE_STDOUT")

LOG_ID_TEMPLATE = conf.get("elasticsearch", "ELASTICSEARCH_LOG_ID_TEMPLATE")
ELASTICSEARCH_JSON_FORMAT = conf.get("elasticsearch", "JSON_FORMAT")

END_OF_LOG_MARK = conf.get("elasticsearch", "ELASTICSEARCH_END_OF_LOG_MARK")
ELASTICSEARCH_JSON_FIELDS = conf.get("elasticsearch", "JSON_FIELDS")

DEFAULT_LOGGING_CONFIG = {
"version": 1,
Expand Down Expand Up @@ -195,10 +207,13 @@
"class": "airflow.utils.log.es_task_handler.ElasticsearchTaskHandler",
"formatter": "airflow",
"base_log_folder": os.path.expanduser(BASE_LOG_FOLDER),
"log_id_template": LOG_ID_TEMPLATE,
"log_id_template": ELASTICSEARCH_LOG_ID_TEMPLATE,
"filename_template": FILENAME_TEMPLATE,
"end_of_log_mark": END_OF_LOG_MARK,
"end_of_log_mark": ELASTICSEARCH_END_OF_LOG_MARK,
"host": ELASTICSEARCH_HOST,
"write_stdout": ELASTICSEARCH_WRITE_STDOUT,
"json_format": ELASTICSEARCH_JSON_FORMAT,
"json_fields": ELASTICSEARCH_JSON_FIELDS,
}
},
}
Expand Down
8 changes: 2 additions & 6 deletions docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ numexpr
numpy
pandas-datareader
pandas-gbq
pandas==0.25.0
pandas==0.25.1
python-dateutil
pytz
scikit-learn
Expand All @@ -39,7 +39,6 @@ dogpile.cache
lru-dict
lxml
pyarrow
pyodbc
pysftp
pyslack-real
quandl
Expand All @@ -50,10 +49,7 @@ slackclient<2.0.0
SQLAlchemy

# infrastructure
apache-airflow[celery,postgres,crypto]==1.10.3
# https://github.com/apache/airflow/pull/5535
# flask<1.10.0
werkzeug==0.15.0
apache-airflow[celery,postgres,crypto]==1.10.4
boto
celery
cookiecutter
Expand Down

0 comments on commit af1df69

Please sign in to comment.