commit 1b3f6693ac3d9be2efae52e40a53286e36e65079 Author: node_data Date: Sat Feb 1 14:33:43 2025 +0000 feat: add initial diff --git a/.env b/.env new file mode 100644 index 0000000..22c64c2 --- /dev/null +++ b/.env @@ -0,0 +1,6 @@ +TAG=4.1.1 +POSTGRES_USER=superset +POSTGRES_PASSWORD=f001f532a8aa7fd3238a1d661bce5180 +POSTGRES_DB=postgres +METABASE_DB=superset + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8fce603 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data/ diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0a8e4f8 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,201 @@ +x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-latest-dev} +x-superset-build: + &superset-build + context: docker +x-superset-volumes: + &superset-volumes # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container + - ./docker:/app/docker + - superset_home:/app/superset_home + +services: + redis: + image: redis:7 + container_name: superset_cache + restart: unless-stopped + volumes: + - redis:/data + + superset: + env_file: + - path: docker/.env # default + required: true + - path: docker/.env-local # optional override + required: false + build: *superset-build + command: ["run-server.sh"] + container_name: superset_app + user: "root" + restart: unless-stopped + ports: + - 8088:8088 + depends_on: + superset-init: + condition: service_completed_successfully + volumes: *superset-volumes + environment: + SUPERSET_LOG_LEVEL: "${SUPERSET_LOG_LEVEL:-info}" + + superset-init: + build: *superset-build + command: ["init_superset.sh"] + container_name: superset_init + env_file: + - path: docker/.env # default + required: true + - path: docker/.env-local # optional override + required: false + depends_on: + postgres: + condition: service_started + redis: + condition: service_started + user: "root" + volumes: *superset-volumes + healthcheck: + disable: true + environment: + SUPERSET_LOAD_EXAMPLES: "${SUPERSET_LOAD_EXAMPLES:-no}" + SUPERSET_LOG_LEVEL: "${SUPERSET_LOG_LEVEL:-info}" + + superset-worker: + build: *superset-build + container_name: superset_worker + command: + - celery + - --app=superset.tasks.celery_app:app + - worker + - -O + - fair + - -l + - INFO + - --concurrency=2 + env_file: + - path: docker/.env # default + required: true + - path: docker/.env-local # optional override + required: false + restart: unless-stopped + depends_on: + superset-init: + condition: service_completed_successfully + user: "root" + volumes: *superset-volumes + healthcheck: + test: + [ + "CMD-SHELL", + "celery -A superset.tasks.celery_app:app inspect ping -d celery@$$HOSTNAME", + ] + environment: + SUPERSET_LOG_LEVEL: "${SUPERSET_LOG_LEVEL:-info}" + + superset-worker-beat: + build: *superset-build + container_name: superset_worker_beat + command: + - celery + - --app=superset.tasks.celery_app:app + - beat + - --pidfile + - /tmp/celerybeat.pid + - -l + - INFO + - -s + - /tmp/celerybeat-schedule + env_file: + - path: docker/.env # default + required: true + - path: docker/.env-local # optional override + required: false + restart: unless-stopped + depends_on: + superset-init: + condition: service_completed_successfully + user: "root" + volumes: *superset-volumes + healthcheck: + disable: true + environment: + SUPERSET_LOG_LEVEL: "${SUPERSET_LOG_LEVEL:-info}" + + postgres: + image: postgres:17-alpine + container_name: postgres + hostname: postgres + env_file: + - path: docker/.env # default + required: true + - path: docker/.env-local # optional override + required: false + environment: + POSTGRES_USER: ${POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?database password required} + POSTGRES_DB: ${POSTGRES_DB:-app} + PGDATA: /var/lib/postgresql/data/pgdata + volumes: + - postgres_data:/var/lib/postgresql/data/pgdata # Persistent data storage + - ./init:/docker-entrypoint-initdb.d/ # Initialization scripts + - ./backup:/backup # Backup location + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + shm_size: 256mb + command: + - "postgres" + - "-c" + - "max_connections=200" + - "-c" + - "shared_buffers=1GB" + - "-c" + - "effective_cache_size=3GB" + - "-c" + - "work_mem=16MB" + - "-c" + - "maintenance_work_mem=512MB" + - "-c" + - "random_page_cost=1.1" + - "-c" + - "temp_file_limit=10GB" + - "-c" + - "log_min_duration_statement=200ms" + - "-c" + - "idle_in_transaction_session_timeout=10s" + - "-c" + - "lock_timeout=1s" + - "-c" + - "statement_timeout=60s" + - "-c" + - "shared_preload_libraries=pg_stat_statements" + - "-c" + - "pg_stat_statements.max=10000" + - "-c" + - "pg_stat_statements.track=all" + + pgbackups: + image: prodrigestivill/postgres-backup-local + container_name: postgres_backup + restart: unless-stopped + volumes: + - ./data/backup:/backups + environment: + - POSTGRES_HOST=postgres + - POSTGRES_DB=${POSTGRES_DB:-app} + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:?database password required} + - SCHEDULE=@daily + - BACKUP_KEEP_DAYS=7 + - BACKUP_KEEP_WEEKS=4 + - BACKUP_KEEP_MONTHS=6 + depends_on: + - postgres + +volumes: + postgres_data: + name: postgres_data + superset_home: + external: false + redis: + external: false diff --git a/docker/.env b/docker/.env new file mode 100644 index 0000000..9be6d73 --- /dev/null +++ b/docker/.env @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Allowing python to print() in docker +PYTHONUNBUFFERED=1 + +COMPOSE_PROJECT_NAME=superset +DEV_MODE=true + +# database configurations (do not modify) +DATABASE_DB=superset +DATABASE_HOST=postgres +# Make sure you set this to a unique secure random value on production +DATABASE_PASSWORD=f001f532a8aa7fd3238a1d661bce5180 +DATABASE_USER=superset + +SUPERSET_LOAD_EXAMPLES=false + +# database engine specific environment variables +# change the below if you prefer another database engine +DATABASE_PORT=5432 +DATABASE_DIALECT=postgresql +POSTGRES_DB=superset +POSTGRES_USER=superset +# Make sure you set this to a unique secure random value on production +POSTGRES_PASSWORD=f001f532a8aa7fd3238a1d661bce5180 +#MYSQL_DATABASE=superset +#MYSQL_USER=superset +#MYSQL_PASSWORD=superset +#MYSQL_RANDOM_ROOT_PASSWORD=yes + +# Add the mapped in /app/pythonpath_docker which allows devs to override stuff +PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev +REDIS_HOST=redis +REDIS_PORT=6379 + +FLASK_DEBUG=true +SUPERSET_ENV=development +SUPERSET_LOAD_EXAMPLES=yes +CYPRESS_CONFIG=false +SUPERSET_PORT=8088 +MAPBOX_API_KEY='' + +# Make sure you set this to a unique secure random value on production +SUPERSET_SECRET_KEY=TEST_NON_DEV_SECRET + +ENABLE_PLAYWRIGHT=false +PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true +BUILD_SUPERSET_FRONTEND_IN_DOCKER=true +SUPERSET_LOG_LEVEL=info diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..4e8e3d8 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,16 @@ +FROM apache/superset:4.1.1 + +USER root + +RUN mkdir -p /app/pythonpath + +RUN apt-get update && apt-get install -y libpq-dev gcc && \ + pip install psycopg2 clickhouse-connect + + +# Copy the entrypoint script into the container +COPY superset_config.py /app/pythonpath/ +COPY init_superset.sh /usr/bin/init_superset.sh + +# Make sure the script is executable +RUN chmod +x /usr/bin/init_superset.sh diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..be29bbe --- /dev/null +++ b/docker/README.md @@ -0,0 +1,75 @@ + + +# Getting Started with Superset using Docker + +Docker is an easy way to get started with Superset. + +## Prerequisites + +1. [Docker](https://www.docker.com/get-started) +2. [Docker Compose](https://docs.docker.com/compose/install/) + +## Configuration + +The `/app/pythonpath` folder is mounted from [`./docker/pythonpath_dev`](./pythonpath_dev) +which contains a base configuration [`./docker/pythonpath_dev/superset_config.py`](./pythonpath_dev/superset_config.py) +intended for use with local development. + +### Local overrides + +In order to override configuration settings locally, simply make a copy of [`./docker/pythonpath_dev/superset_config_local.example`](./pythonpath_dev/superset_config_local.example) +into `./docker/pythonpath_dev/superset_config_docker.py` (git ignored) and fill in your overrides. + +### Local packages + +If you want to add Python packages in order to test things like databases locally, you can simply add a local requirements.txt (`./docker/requirements-local.txt`) +and rebuild your Docker stack. + +Steps: + +1. Create `./docker/requirements-local.txt` +2. Add your new packages +3. Rebuild docker compose + 1. `docker compose down -v` + 2. `docker compose up` + +## Initializing Database + +The database will initialize itself upon startup via the init container ([`superset-init`](./docker-init.sh)). This may take a minute. + +## Normal Operation + +To run the container, simply run: `docker compose up` + +After waiting several minutes for Superset initialization to finish, you can open a browser and view [`http://localhost:8088`](http://localhost:8088) +to start your journey. + +## Developing + +While running, the container server will reload on modification of the Superset Python and JavaScript source code. +Don't forget to reload the page to take the new frontend into account though. + +## Production + +It is possible to run Superset in non-development mode by using [`docker-compose-non-dev.yml`](../docker-compose-non-dev.yml). This file excludes the volumes needed for development and uses [`./docker/.env-non-dev`](./.env-non-dev) which sets the variable `SUPERSET_ENV` to `production`. + +## Resource Constraints + +If you are attempting to build on macOS and it exits with 137 you need to increase your Docker resources. See instructions [here](https://docs.docker.com/docker-for-mac/#advanced) (search for memory) diff --git a/docker/apt-install.sh b/docker/apt-install.sh new file mode 100755 index 0000000..bd9152b --- /dev/null +++ b/docker/apt-install.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -euo pipefail + +# Ensure this script is run as root +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" >&2 + exit 1 +fi + +# Check for required arguments +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [ ...]" >&2 + exit 1 +fi + +# Colors for better logging (optional) +GREEN='\033[0;32m' +RED='\033[0;31m' +RESET='\033[0m' + +# Install packages with clean-up +echo -e "${GREEN}Updating package lists...${RESET}" +apt-get update -qq + +echo -e "${GREEN}Installing packages: $@${RESET}" +apt-get install -yqq --no-install-recommends "$@" + +echo -e "${GREEN}Autoremoving unnecessary packages...${RESET}" +apt-get autoremove -y + +echo -e "${GREEN}Cleaning up package cache and metadata...${RESET}" +apt-get clean +rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/* + +echo -e "${GREEN}Installation and cleanup complete.${RESET}" diff --git a/docker/docker-bootstrap.sh b/docker/docker-bootstrap.sh new file mode 100755 index 0000000..1f7f17b --- /dev/null +++ b/docker/docker-bootstrap.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eo pipefail + +# Make python interactive +if [ "$DEV_MODE" == "true" ]; then + if command -v uv > /dev/null 2>&1; then + echo "Reinstalling the app in editable mode" + uv pip install -e . + fi +fi +REQUIREMENTS_LOCAL="/app/docker/requirements-local.txt" +# If Cypress run – overwrite the password for admin and export env variables +if [ "$CYPRESS_CONFIG" == "true" ]; then + export SUPERSET_CONFIG=tests.integration_tests.superset_test_config + export SUPERSET_TESTENV=true + export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset +fi +if [[ "$DATABASE_DIALECT" == postgres* ]] ; then + echo "Installing postgres requirements" + if command -v uv > /dev/null 2>&1; then + # Use uv in newer images + uv pip install -e .[postgres] + else + # Use pip in older images + pip install -e .[postgres] + fi +fi +# +# Make sure we have dev requirements installed +# +if [ -f "${REQUIREMENTS_LOCAL}" ]; then + echo "Installing local overrides at ${REQUIREMENTS_LOCAL}" + uv pip install --no-cache-dir -r "${REQUIREMENTS_LOCAL}" +else + echo "Skipping local overrides" +fi + +case "${1}" in + worker) + echo "Starting Celery worker..." + # setting up only 2 workers by default to contain memory usage in dev environments + celery --app=superset.tasks.celery_app:app worker -O fair -l INFO --concurrency=${CELERYD_CONCURRENCY:-2} + ;; + beat) + echo "Starting Celery beat..." + rm -f /tmp/celerybeat.pid + celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO -s "${SUPERSET_HOME}"/celerybeat-schedule + ;; + app) + echo "Starting web app (using development server)..." + flask run -p 8088 --with-threads --reload --debugger --host=0.0.0.0 + ;; + app-gunicorn) + echo "Starting web app..." + /usr/bin/run-server.sh + ;; + *) + echo "Unknown Operation!!!" + ;; +esac diff --git a/docker/docker-entrypoint-initdb.d/examples-init.sh b/docker/docker-entrypoint-initdb.d/examples-init.sh new file mode 100755 index 0000000..bed1543 --- /dev/null +++ b/docker/docker-entrypoint-initdb.d/examples-init.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# ------------------------------------------------------------------------ +# Creates the examples database and respective user. This database location +# and access credentials are defined on the environment variables +# ------------------------------------------------------------------------ +set -e + +psql -v ON_ERROR_STOP=1 --username "${POSTGRES_USER}" <<-EOSQL + CREATE USER ${EXAMPLES_USER} WITH PASSWORD '${EXAMPLES_PASSWORD}'; + CREATE DATABASE ${EXAMPLES_DB}; + GRANT ALL PRIVILEGES ON DATABASE ${EXAMPLES_DB} TO ${EXAMPLES_USER}; +EOSQL + +psql -v ON_ERROR_STOP=1 --username "${POSTGRES_USER}" -d "${EXAMPLES_DB}" <<-EOSQL + GRANT ALL ON SCHEMA public TO ${EXAMPLES_USER}; +EOSQL diff --git a/docker/docker-frontend.sh b/docker/docker-frontend.sh new file mode 100755 index 0000000..f851576 --- /dev/null +++ b/docker/docker-frontend.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -e + +# Packages needed for puppeteer: +if [ "$PUPPETEER_SKIP_CHROMIUM_DOWNLOAD" = "false" ]; then + apt update + apt install -y chromium +fi + +if [ "$BUILD_SUPERSET_FRONTEND_IN_DOCKER" = "true" ]; then + echo "Building Superset frontend in dev mode inside docker container" + cd /app/superset-frontend + + if [ "$NPM_RUN_PRUNE" = "true" ]; then + echo "Running `npm run prune`" + npm run prune + fi + + echo "Running `npm install`" + npm install + + echo "Start webpack dev server" + npm run dev + +else + echo "Skipping frontend build steps - YOU NEED TO RUN IT MANUALLY ON THE HOST!" + echo "https://superset.apache.org/docs/contributing/development/#webpack-dev-server" +fi diff --git a/docker/docker-init.sh b/docker/docker-init.sh new file mode 100755 index 0000000..0f93058 --- /dev/null +++ b/docker/docker-init.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -e + +# +# Always install local overrides first +# +/app/docker/docker-bootstrap.sh + +if [ "$SUPERSET_LOAD_EXAMPLES" = "yes" ]; then + STEP_CNT=4 +else + STEP_CNT=3 +fi + +echo_step() { +cat <