airflow源码精读 二

Building the image 构建镜像

Installing from Docker Images

https://airflow.apache.org/docs/docker-stack/index.html

# apt安装 包
FROM apache/airflow:2.5.1
USER root
RUN apt-get update \
  && apt-get install -y --no-install-recommends \
         vim \
  && apt-get autoremove -yqq --purge \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
USER airflow

# pypi 安装包
FROM apache/airflow:2.5.1
RUN pip install --no-cache-dir lxml

# 用requirements.txt安装包
FROM apache/airflow:2.5.1
COPY requirements.txt /
RUN pip install --no-cache-dir -r /requirements.txt

# Embedding DAGs 嵌入加载dag
FROM apache/airflow:2.5.1
# 把test_dag.py以用户airflow 用户组root copy到镜像/opt/airflow/dags中
COPY --chown=airflow:root test_dag.py /opt/airflow/dags

test_dag.py如下

import datetime

import pendulum

from airflow.models.dag import DAG
from airflow.operators.empty import EmptyOperator

now = pendulum.now(tz="UTC")
now_to_the_hour = (now - datetime.timedelta(0, 0, 0, 0, 0, 3)).replace(minute=0, second=0, microsecond=0)
START_DATE = now_to_the_hour
DAG_NAME = "test_dag_v1"

dag = DAG(
    DAG_NAME,
    schedule="*/10 * * * *",
    default_args={"depends_on_past": True},
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
    catchup=False,
)

run_this_1 = EmptyOperator(task_id="run_this_1", dag=dag)
run_this_2 = EmptyOperator(task_id="run_this_2", dag=dag)
run_this_2.set_upstream(run_this_1)
run_this_3 = EmptyOperator(task_id="run_this_3", dag=dag)
run_this_3.set_upstream(run_this_2)

启用Buildkit来构建映像

DOCKER_BUILDKIT=1

Using docker-context-files

build镜像时需要添加参数--build-arg DOCKER_CONTEXT_FILES=docker-context-files

mkdir -p docker-context-files

cat <./docker-context-files/requirements.txt
beautifulsoup4==4.10.0
EOF

export DOCKER_BUILDKIT=1
docker build . \
    --build-arg DOCKER_CONTEXT_FILES=./docker-context-files \
    --tag "my-beautifulsoup4-airflow:0.0.1"
docker run -it my-beautifulsoup4-airflow:0.0.1 python -c 'import bs4; import sys; sys.exit(0)' && \
    echo "Success! Beautifulsoup4 installed" && echo

比如配置pip.conf

mkdir -p docker-context-files

cat <./docker-context-files/pip.conf
[global]
verbose = 2
EOF

export DOCKER_BUILDKIT=1
docker build . \
    --build-arg DOCKER_CONTEXT_FILES=./docker-context-files \
    --tag "my-custom-pip-verbose-airflow:0.0.1"
docker run -it my-beautifulsoup4-airflow:0.0.1 python -c 'import bs4; import sys; sys.exit(0)' && \
    echo "Success! Beautifulsoup4 installed" && echo

替换.whl

mkdir -p docker-context-files
export AIRFLOW_VERSION="2.2.4"
rm docker-context-files/*.whl docker-context-files/*.tar.gz docker-context-files/*.txt || true

curl -Lo "docker-context-files/constraints-3.7.txt" \
    "https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.7.txt"

echo
echo "Make sure you use the right python version here (should be same as in constraints)!"
echo
python --version

pip download --dest docker-context-files \
    --constraint docker-context-files/constraints-3.7.txt  \
    "apache-airflow[async,celery,elasticsearch,kubernetes,postgres,redis,ssh,statsd,virtualenv]==${AIRFLOW_VERSION}"

添加extra

export AIRFLOW_VERSION=2.3.4
export DOCKER_BUILDKIT=1

docker build . \
    --pull \
    --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bullseye" \
    --build-arg AIRFLOW_VERSION="${AIRFLOW_VERSION}" \
    --build-arg ADDITIONAL_AIRFLOW_EXTRAS="mssql,hdfs" \
    --build-arg ADDITIONAL_PYTHON_DEPS="oauth2client" \
    --tag "my-pypi-extras-and-deps:0.0.1"
    
    
export AIRFLOW_VERSION=2.2.4
export DOCKER_BUILDKIT=1

docker build . \
    --pull \
    --build-arg PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" \
    --build-arg AIRFLOW_VERSION="${AIRFLOW_VERSION}" \
    --build-arg ADDITIONAL_PYTHON_DEPS="mpi4py" \
    --build-arg ADDITIONAL_DEV_APT_DEPS="libopenmpi-dev" \
    --build-arg ADDITIONAL_RUNTIME_APT_DEPS="openmpi-common" \
    --tag "my-build-essential-image:0.0.1"    

通过github

export DOCKER_BUILDKIT=1

docker build . \
    --pull \
    --build-arg PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" \
    --build-arg AIRFLOW_INSTALLATION_METHOD="https://github.com/apache/airflow/archive/main.tar.gz#egg=apache-airflow" \
    --build-arg AIRFLOW_CONSTRAINTS_REFERENCE="constraints-main" \
    --tag "my-github-main:0.0.1"

Build args参考

https://airflow.apache.org/docs/docker-stack/build-arg-ref.html

你可能感兴趣的:(调度系统,python)