diff -Nru charliecloud-0.36/README.rst charliecloud-0.37/README.rst --- charliecloud-0.36/README.rst 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/README.rst 2024-02-29 20:52:27.000000000 +0000 @@ -46,8 +46,8 @@ detail the motivation for Charliecloud and the technology upon which it is based: https://www.usenix.org/publications/login/fall2017/priedhorsky -* A more technical resource is our Supercomputing 2017 paper: - https://dl.acm.org/citation.cfm?id=3126925 +* For technical papers about Charliecloud, refer to the *Technical + publications* section below. Who is responsible? ------------------- @@ -92,8 +92,8 @@ We are friendly and welcoming of diversity on all dimensions. -How do I cite Charliecloud? ---------------------------- +Technical publications +---------------------- If Charliecloud helped your research, or it was useful to you in any other context where bibliographic citations are appropriate, please cite the @@ -106,6 +106,35 @@ *Note:* This paper contains out-of-date number for the size of Charliecloud’s code. Please instead use the current number in the FAQ. +Other publications: + +* We compare the performance of three HPC-specific container technologies + against bare metal, finding no concerns about performance degradation. + + Alfred Torrez, Tim Randles, and Reid Priedhorsky. “HPC container runtimes + have minimal or no performance impact”, 2019. In *Proc. CANOPIE HPC + Workshop @ SC*. DOI: `10.1109/CANOPIE-HPC49598.2019.00010 + `_. + +* A demonstration of how low-privilege containers solve increasing demand for + software flexibility. + + Reid Priedhorsky, R. Shane Canon, Timothy Randles, and Andrew J. Younge. + “Minimizing privilege for building HPC containers”, 2021. In *Proc. + Supercomputing*. DOI: `10.6084/m9.figshare.14396099 + `_. + +* Charliecloud’s build cache performs competitively with the standard + many-layered union filesystem approach and has structural advantages + including a better diff format, lower cache overhead, and better file + de-duplication. + + Reid Priedhorsky, Jordan Ogas, Claude H. (Rusty) Davis IV, Z. Noah + Hounshel, Ashlyn Lee, Benjamin Stormer, and R. Shane Goff. "Charliecloud’s + layer-free, Git-based container build cache", 2023. In *Proc. + Supercomputing*. DOI: `10.1145/3624062.3624585 + `_. + Copyright and license --------------------- @@ -134,4 +163,4 @@ from LANL. -.. LocalWords: USENIX's CNA Meisam +.. LocalWords: USENIX's CNA Meisam figshare diff -Nru charliecloud-0.36/VERSION charliecloud-0.37/VERSION --- charliecloud-0.36/VERSION 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/VERSION 2024-02-29 20:52:27.000000000 +0000 @@ -1 +1 @@ -0.36 +0.37 diff -Nru charliecloud-0.36/bin/ch-completion.bash charliecloud-0.37/bin/ch-completion.bash --- charliecloud-0.36/bin/ch-completion.bash 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/bin/ch-completion.bash 2024-02-29 20:52:27.000000000 +0000 @@ -6,9 +6,16 @@ # shellcheck disable=SC2207 # SC2034 complains about modifying variables by reference in -# _ch_run_image_finder. Disable it. +# _ch_run_parse. Disable it. # shellcheck disable=SC2034 +# Permissions for this file: +# +# This file needs to be sourced, not executed. Because of this, the execute bit +# for the file shoud remain unset for all permission groups. +# +# (sourcing versus executing: https://superuser.com/a/176788) + # Resources for understanding this script: # # * Everything bash: @@ -29,11 +36,11 @@ ## SYNTAX GLOSSARY ## # -# This script uses syntax that may be confusing for bash newbies and those who -# are rusty. -# -# Source: -# https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html +# Bash has some pretty unusual syntax, and this script has no shortage of +# strange Bash-isms. I’m including this syntax glossary with the hope that it’ll +# make this code more readable for Bash newbies and those who are rusty. For more +# info, see the gnu.org “Bash parameter expansion” page linked above, which is also +# the source for this glossary. # # ${array[i]} # Gives the ith element of “array”. Note that bash arrays are indexed at @@ -72,9 +79,23 @@ # a b c # $ echo ${foo[@]:1:3} # b c d +# +# ${parameter/pattern/string} +# This is a form of pattern replacement in which “parameter” is expanded and +# the first instance of “pattern” is replaced with “string”. +# +# ${parameter//pattern/string} +# Similar to “${parameter/pattern/string}” above, except every instance of +# “pattern” in the expanded parameter is replaced by “string” instead of only +# the first. +# + + +## Setup ## -# According to this (https://stackoverflow.com/a/50281697) post, bash 4.3 alpha -# added the feature used in this script to pass a variable by ref. +# According to this post (https://stackoverflow.com/a/50281697), Bash 4.3 alpha +# added the feature that enables the use of out parameters for functions (or +# passing variables by reference), which is an integral feature of this script. bash_vmin=4.3.0 # Check Bash version @@ -96,11 +117,19 @@ fi fi -# Debugging log -if [[ -f "/tmp/ch-completion.log" && -n "$CH_COMPLETION_DEBUG" ]]; then - printf "completion log\n\n" >> /tmp/ch-completion.log +# https://stackoverflow.com/a/246128 +_ch_completion_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +_ch_completion_version="$("$_ch_completion_dir"/../misc/version)" + +_ch_completion_log="/tmp/ch-completion.log" + +# Record file being sourced. +if [[ -n "$CH_COMPLETION_DEBUG" ]]; then + printf "ch-completion.bash sourced\n\n" >> "$_ch_completion_log" fi +_ch_completable_executables="ch-image ch-run ch-convert" + ## ch-convert ## @@ -140,11 +169,12 @@ _DEBUG " output format: $fmt_out" if [[ $opts_end != -1 ]]; then _DEBUG " input image: ${words[$opts_end]}" + else + _DEBUG " input image:" fi # Command line options if [[ ($opts_end == -1) || ($cword -lt $opts_end) ]]; then - _DEBUG "GOT HERE" case "$prev" in -i|--in-fmt) COMPREPLY=( $(compgen -W "${_convert_fmts//$fmt_out/}" -- "$cur") ) @@ -184,6 +214,7 @@ if [[ -n "$(compgen -d -- "$cur")" ]]; then compopt -o nospace fi + return 0 ;; squash) COMPREPLY+=( $(_compgen_filepaths -X "!*.sqfs" "$cur") ) @@ -202,6 +233,7 @@ COMPREPLY+=( $(_compgen_filepaths -X "!*.tar.* !*tgz !*.sqfs" "$cur") ) COMPREPLY+=( $(compgen -W "$(_ch_list_images "$strg_dir")" -- "$cur") ) _space_filepath -X "!*.tar.* !*tgz !*.sqfs" "$cur" + __ltrim_colon_completions "$cur" return 0 ;; esac @@ -227,8 +259,8 @@ _image_build_opts="-b --bind --build-arg -f --file --force --force-cmd -n --dry-run --parse-only -t --tag" -_image_common_opts="-a --arch --always-download --auth --cache - --cache-large --dependencies -h --help +_image_common_opts="-a --arch --always-download --auth --break + --cache --cache-large --dependencies -h --help --no-cache --no-lock --no-xattrs --profile --rebuild --password-many -q --quiet -s --storage --tls-no-verify -v --verbose --version --xattrs" @@ -278,6 +310,14 @@ COMPREPLY=( $(compgen -W "host yolo $_archs" -- "$cur") ) return 0 ;; + --break) + # “--break” arguments take the form “MODULE:LINE”. Complete “MODULE:” + # from python files in lib (we can’t complete line number). + COMPREPLY=( $(compgen -S : -W "$(_compgen_py_libfiles)" -- "$cur") ) + __ltrim_colon_completions + compopt -o nospace + return 0 + ;; --cache-large) # This is just a user-specified number. Can’t autocomplete COMPREPLY=() @@ -416,7 +456,7 @@ strg_dir=$(_ch_find_storage "${words[@]::$cword}" "${words[@]:$cword+1:${#array[@]}-1}") local cli_image local cmd_index=-1 - _ch_run_image_finder "$strg_dir" "$cword" cli_image cmd_index "${words[@]}" + _ch_run_parse "$strg_dir" "$cword" cli_image cmd_index "${words[@]}" # Populate debug log _DEBUG "\$ ${words[*]}" @@ -512,18 +552,70 @@ ## Helper functions ## +_ch_completion_help="Usage: ch-completion [ OPTION ] + +Utility function for Charliecloud tab completion. + + --disable disable tab completion for all Charliecloud executables + --help show this help message + --version check tab completion script version + --version-ok check version compatibility between tab completion and Charliecloud + executables +" + # Add debugging text to log file if CH_COMPLETION_DEBUG is specified. _DEBUG () { if [[ -n "$CH_COMPLETION_DEBUG" ]]; then - echo "$@" >> /tmp/ch-completion.log + #echo "$@" >> "$_ch_completion_log" + printf "%s\n" "$@" >> "$_ch_completion_log" fi } -# Disable completion. -ch-completion-disable () { - complete -r ch-convert - complete -r ch-image - complete -r ch-run +# Utility function for Charliecloud tab completion that’s available to users. +ch-completion () { + while true; do + case $1 in + --disable) + complete -r ch-convert + complete -r ch-image + complete -r ch-run + ;; + --help) + printf "%s" "$_ch_completion_help" 1>&2 + return 0 + ;; + --version) + printf "%s\n" "$_ch_completion_version" 1>&2 + ;; + --version-ok) + if _version_ok_ch_completion "ch-image"; then + printf "version ok\n" 1>&2 + return 0 + else + printf "ch-image: %s\n" "$(ch-image --version)" 1>&2 + printf "ch-completion: %s\n" "$_ch_completion_version" 1>&2 + printf "version incompatible!\n" 1>&2 + return 1 + fi + ;; + *) + break + ;; + esac + shift + done +} + +_completion_opts="--disable --help --version --version-ok" + +# Yes, the untility function needs completion too... +# +_ch_completion_complete () { + local cur + _get_comp_words_by_ref -n : cur + + COMPREPLY=( $(compgen -W "$_completion_opts" -- "$cur") ) + return 0 } # Parser for ch-convert command line. Takes 6 arguments: @@ -631,15 +723,6 @@ fi } -# List images in storage directory. -_ch_list_images () { - # “find” throws an error if “img” subdir doesn't exist or is empty, so check - # before proceeding. - if [[ -d "$1/img" && -n "$(ls -A "$1/img")" ]]; then - find "$1/img/"* -maxdepth 0 -printf "%f\n" | sed -e 's|+|:|g' -e 's|%|/|g' - fi -} - # Print the subcommand in an array of words; if there is not one, print an empty # string. This feels a bit kludge-y, but it's the best I could come up with. # It's worth noting that the double for loop doesn't take that much time, since @@ -671,6 +754,15 @@ echo "$subcmd" } +# List images in storage directory. +_ch_list_images () { + # “find” throws an error if “img” subdir doesn't exist or is empty, so check + # before proceeding. + if [[ -d "$1/img" && -n "$(ls -A "$1/img")" ]]; then + find "$1/img/"* -maxdepth 0 -printf "%f\n" | sed -e 's|+|:|g' -e 's|%|/|g' + fi +} + # Horrible, disgusting function to find an image or image ref in the ch-run # command line. This function takes five arguments: # @@ -680,7 +772,7 @@ # representing the command line (index starting at 0). # # 3.) An out parameter (see explanation above “_ch_convert_parse”). If -# “_ch_run_image_finder” finds the name of an image in storage (e.g. +# “_ch_run_parse” finds the name of an image in storage (e.g. # “alpine:latest”) or something that looks like an image path (i.e. a # directory, tarball or file named like a squash archive) in the command # line, the value of the variable will be updated to the image name or @@ -689,8 +781,8 @@ # # 4.) Another out parameter. If this function finds “--” in the current # command line and it doesn't seem like the user is trying to complete -# that “--” to an option, “_ch_run_image_finder” will assume that this is -# the point beyond which the user specifies commands to be run inside the +# that “--” to an option, “_ch_run_parse” will assume that this is the +# point beyond which the user specifies commands to be run inside the # container and will give the variable the index value of the “--”. Our # criterion for deciding that the user isn't trying to complete “--” to an # option is that the current index of the cursor in the word array @@ -700,7 +792,7 @@ # 5.) A string representing the expanded command line array (i.e. # "${array[@]}"). # -_ch_run_image_finder () { +_ch_run_parse () { # The essential purpose of this function is to try to find an image in the # current command line. If it finds one, it passes the “name” of the image # back to the caller in the form of an out parameter (see above). If it @@ -744,11 +836,9 @@ fi # Check for refs to images in storage. if [[ -z $cli_img ]]; then - for img in $images; do - if [[ ${wrds[$ct]} == "$img" ]]; then - cli_img="${wrds[$ct]}" - fi - done + if _is_subword "${wrds[$ct]}" "$images"; then + cli_img="${wrds[$ct]}" + fi fi fi ((ct++)) @@ -805,6 +895,13 @@ compgen -d -S / -- "$cur" } +# Wrapper for a horrible pipeline to complete python files in lib. +_compgen_py_libfiles () { + compgen -f "$_ch_completion_dir/../lib/" | + grep -o -E ".*\.py" | + sed "s|$_ch_completion_dir\/\.\.\/lib\/\(.*\)\.py|\1|" +} + # Return 0 if "$1" is a word in space-separated sequence of words "$2", e.g. # # >>> _is_subword "foo" "foo bar baz" @@ -824,22 +921,6 @@ return 1 } -# Wrapper for some tricky logic that determines whether or not to add a space at -# the end of a path completion. For the sake of convenience we want to avoid -# adding a space at the end if the completion is a directory path, because we -# don’t know if the user is looking for the completed directory or one of its -# subpaths (we may be able to figure this out in some cases, but I’m not gonna -# worry about that now). We *do* want to add a space at the end if the -# completion is the path to a file. -_space_filepath () { - local files - files="$(_compgen_filepaths "$1" "$2" "$3")" - if [[ (-n "$files") \ - && (! -f "$(_sanitized_tilde_expand "$files")") ]]; then - compopt -o nospace - fi -} - # Expand tilde in quoted strings to the correct home path, if applicable, while # sanitizing to prevent code injection (see https://stackoverflow.com/a/38037679). # @@ -865,6 +946,31 @@ echo "$1" } +# Wrapper for some tricky logic that determines whether or not to add a space at +# the end of a path completion. For the sake of convenience we want to avoid +# adding a space at the end if the completion is a directory path, because we +# don’t know if the user is looking for the completed directory or one of its +# subpaths (we may be able to figure this out in some cases, but I’m not gonna +# worry about that now). We *do* want to add a space at the end if the +# completion is the path to a file. +_space_filepath () { + local files + files="$(_compgen_filepaths "$1" "$2" "$3")" + if [[ (-n "$files") \ + && (! -f "$(_sanitized_tilde_expand "$files")") ]]; then + compopt -o nospace + fi +} + +_version_ok_ch_completion () { + if [[ "$($1 --version 2>&1)" == "$_ch_completion_version" ]]; then + return 0 + else + return 1 + fi +} + +complete -F _ch_completion_complete ch-completion complete -F _ch_convert_complete ch-convert complete -F _ch_image_complete ch-image complete -F _ch_run_complete ch-run diff -Nru charliecloud-0.36/bin/ch-fromhost charliecloud-0.37/bin/ch-fromhost --- charliecloud-0.36/bin/ch-fromhost 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/bin/ch-fromhost 2024-02-29 20:52:27.000000000 +0000 @@ -4,10 +4,10 @@ # source:destination pairs separated by newlines, then walk through them and # copy them into the image. # -# The colon separator is to avoid the difficulty of iterating through a sequence -# of pairs with no arrays or structures in POSIX sh. We could avoid it by -# taking action immediately upon encountering each file in the argument list, -# but that would (a) yield a half-injected image for basic errors like +# The colon separator is to avoid the difficulty of iterating through a +# sequence of pairs with no arrays or structures in POSIX sh. We could avoid +# it by taking action immediately upon encountering each file in the argument +# list, but that would (a) yield a half-injected image for basic errors like # misspellings on the command line and (b) would require the image to be first # on the command line, which seems awkward. # @@ -62,11 +62,12 @@ Options: - --print-fi print inferred destination for libfabric provider(s) --print-cray-fi print inferred destination for libfabric replacement + --print-fi print inferred destination for libfabric provider(s) --print-lib print inferred destination for shared libraries --no-ldconfig don’t run ldconfig even if we injected shared libraries -h, --help print this help and exit + -q, --quiet make the program more quiet, can be repeated -v, --verbose list the injected files --version print version and exit @@ -92,29 +93,8 @@ print_lib_dest= no_ldconfig= -debug () { - if [ "$verbose" = 'yes' ]; then - printf '[ debug ] %s\n' "$1" 1>&2 - fi -} - -debug_indent () { - if [ "$verbose" = 'yes' ]; then - printf '[ debug ] %s\n' "$1" 1>&2 - fi -} - ensure_nonempty () { - [ "$2" ] || fatal "$1 must not be empty" -} - -fatal () { - printf 'error: ch-fromhost: %s\n' "$1" 1>&2 - exit 1 -} - -info () { - printf 'ch-fromhost: %s\n' "$1" 1>&2 + [ "$2" ] || FATAL -- "$1 must not be empty" } is_bin () { @@ -144,27 +124,27 @@ old_ifs="$IFS" IFS="$newline" d="${dest:-$2}" - debug "enqueue file(s)" + VERBOSE "enqueue file(s)" for f in $1; do case $f in *:*) - fatal "paths can't contain colon: ${f}" + FATAL "paths can't contain colon: ${f}" ;; esac if is_so "$f"; then case $f in *libfabric.so) if ldd "$f" | grep libcxi > /dev/null 2>&1; then - debug_indent "cray libfabric: ${f}" + DEBUG "cray libfabric: ${f}" cray_fi_found=yes host_libfabric=$f else - debug_indent "libfabric: ${f}" + DEBUG "libfabric: ${f}" lib_found=yes fi ;; *-fi.so) - debug_indent "libfabric shared provider: ${f}" + DEBUG "libfabric shared provider: ${f}" fi_prov_found=yes # Providers, like Cray's libgnix-fi.so, link against paths that # need to be bind-mounted at run-time. Some of these paths need @@ -175,13 +155,14 @@ ld=$(dirname "$(readlink -f "$l")") # Avoid duplicates and host libfabric.so. if [ "$(echo "$ld_conf" | grep -c "$ld")" -eq 0 ] \ - && [ "$(echo "$ld" | grep -c "libfabric.so")" -eq 0 ]; then + && [ "$(echo "$ld" | grep -c "libfabric.so")" -eq 0 ]; \ + then enqueue_ldconf "$ld" fi done ;; *) - debug_indent "shared library: ${f}" + DEBUG "shared library: ${f}" lib_found=yes ;; esac @@ -223,108 +204,109 @@ fi } -parse_basic_args "$@" +if [ "$#" -eq 0 ]; then + usage 1 +fi while [ $# -gt 0 ]; do opt=$1; shift - case $opt in - -c|--cmd) - ensure_nonempty --cmd "$1" - out=$($1) || fatal "command failed: $1" - enqueue_file "$out" - shift - ;; - --cray-cxi) - warn_fi_var - if [ -z "$CH_FROMHOST_OFI_CXI" ]; then - fatal "CH_FROMHOST_OFI_CXI is not set" - fi - enqueue_file "$CH_FROMHOST_OFI_CXI" - ;; - --cray-gni) - warn_fi_var - if [ -z "$CH_FROMHOST_OFI_GNI" ]; then - fatal "CH_FROMHOST_OFI_GNI is not set" - fi - enqueue_file "$CH_FROMHOST_OFI_GNI" - ;; - -d|--dest) - ensure_nonempty --dest "$1" - dest=$1 - shift - ;; - -f|--file) - ensure_nonempty --file "$1" - out=$(cat "$1") || fatal "cannot read file: ${1}" - enqueue_file "$out" - shift - ;; - # Note: Specifying any of the --print-* options along with one of the - # file specification options will result in all the file gathering and - # checking work being discarded. - --print-cray-fi) - cray_fi_found=yes - print_cray_fi_dest=yes - ;; - --print-fi) - fi_prov_found=yes - print_fi_dest=yes - ;; - --print-lib) - lib_found=yes - print_lib_dest=yes - ;; - --no-ldconfig) - no_ldconfig=yes - ;; - --nvidia) - out=$(nvidia-container-cli list --binaries --libraries) \ - || fatal "nvidia-container-cli failed; does this host have GPUs?" - enqueue_file "$out" - ;; - -p|--path) - ensure_nonempty --path "$1" - enqueue_file "$1" - shift - ;; - -v|--verbose) - verbose=yes - ;; - -*) - info "invalid option: ${opt}" - usage - ;; - *) - ensure_nonempty "image path" "${opt}" - [ -z "$image" ] || fatal "duplicate image: ${opt}" - [ -d "$opt" ] || fatal "image not a directory: ${opt}" - image="$opt" - ;; - esac + if ! parse_basic_arg "$opt"; then + case $opt in + -c|--cmd) + ensure_nonempty --cmd "$1" + out=$($1) || FATAL "command failed: $1" + enqueue_file "$out" + shift + ;; + --cray-cxi) + warn_fi_var + if [ -z "$CH_FROMHOST_OFI_CXI" ]; then + FATAL "CH_FROMHOST_OFI_CXI is not set" + fi + enqueue_file "$CH_FROMHOST_OFI_CXI" + ;; + --cray-gni) + warn_fi_var + if [ -z "$CH_FROMHOST_OFI_GNI" ]; then + FATAL "CH_FROMHOST_OFI_GNI is not set" + fi + enqueue_file "$CH_FROMHOST_OFI_GNI" + ;; + -d|--dest) + ensure_nonempty --dest "$1" + dest=$1 + shift + ;; + -f|--file) + ensure_nonempty --file "$1" + out=$(cat "$1") || FATAL "cannot read file: ${1}" + enqueue_file "$out" + shift + ;; + # Note: Specifying any of the --print-* options along with one of + # the file specification options will result in all the file + # gathering and checking work being discarded. + --print-cray-fi) + cray_fi_found=yes + print_cray_fi_dest=yes + ;; + --print-fi) + fi_prov_found=yes + print_fi_dest=yes + ;; + --print-lib) + lib_found=yes + print_lib_dest=yes + ;; + --no-ldconfig) + no_ldconfig=yes + ;; + --nvidia) + out=$(nvidia-container-cli list --binaries --libraries) \ + || FATAL "nvidia-container-cli failed; does this host have GPUs?" + enqueue_file "$out" + ;; + -p|--path) + ensure_nonempty --path "$1" + enqueue_file "$1" + shift + ;; + -*) + INFO "invalid option: ${opt}" + usage + ;; + *) + ensure_nonempty "image path" "${opt}" + [ -z "$image" ] || FATAL "duplicate image: ${opt}" + [ -d "$opt" ] || FATAL "image not a directory: ${opt}" + image="$opt" + ;; + esac + fi done if [ -n "$FI_PROVIDER_PATH" ] && [ -n "$fi_prov_found" ] && [ -z "$dest" ]; then - fatal "FI_PROVIDER_PATH set; missing --dest" + FATAL "FI_PROVIDER_PATH set; missing --dest" fi -[ "$image" ] || fatal "no image specified" +[ "$image" ] || FATAL "no image specified" if [ -n "$cray_fi_found" ]; then # There is no Slingshot provider CXI; to leverage slingshot we need to # replace the image libfabric.so with Cray's. - debug "searching image for inferred libfabric destiation" + VERBOSE "searching image for inferred libfabric destiation" img_libfabric=$(find "$image" -name "libfabric.so") - [ -n "$img_libfabric" ] || fatal "libfabric.so not found in $image" - debug_indent "found $img_libfabric" + [ -n "$img_libfabric" ] || FATAL "libfabric.so not found in $image" + DEBUG "found $img_libfabric" if [ "$(echo "$img_libfabric" | wc -l)" -ne 1 ]; then warn 'found more than one libfabric.so' fi img_libfabric_path=$(echo "$img_libfabric" | sed "s@$image@@") cray_fi_dest=$(dirname "/$img_libfabric_path") - # Since cray's libfabric isn't a standard provider, to use slingshot we must - # also add any missing linked libraries from the host. - debug "adding cray libfabric libraries" + # Since cray's libfabric isn't a standard provider, to use slingshot we + # must also add any missing linked libraries from the host. + VERBOSE "adding cray libfabric libraries" ldds=$(ldd "$host_libfabric" 2>&1 | grep lib | awk '{print $3}' | sort -u) for l in $ldds; do # Do not replace any libraries found in the image, experimentation has @@ -332,7 +314,7 @@ # both MPICH and OpenMPI examples work with this conservative approach. file_found=$(find "${image}" -name "$(basename "$l")") if [ -n "$file_found" ]; then - debug_indent "skipping $l" + DEBUG "skipping $l" continue fi enqueue_file "$l" @@ -348,33 +330,37 @@ # We want to put the libraries in the first directory that ldconfig # searches, so that we can override (or overwrite) any of the same library # that may already be in the image. - debug "asking ldconfig for inferred shared library destination" - # "ldconfig -Nv" gives some pointless warnings on stderr even if - # successful; we don't want to show those to users. However, we don't want - # to simply pipe stderr to /dev/null because this hides real errors. Thus, - # use the following abomination to pipe stdout and stderr to *separate - # grep commands*. See: https://stackoverflow.com/a/31151808 + VERBOSE "asking ldconfig for inferred shared library destination" + # "ldconfig -Nv" gives pointless warnings on stderr even if successful; we + # don't want to show those to users (unless -vv or higher). However, we + # don't want to simply pipe stderr to /dev/null because this hides real + # errors. Thus, use the following abomination to pipe stdout and stderr to + # *separate grep commands*. See: https://stackoverflow.com/a/31151808 + if [ "$log_level" -lt 2 ]; then # VERBOSE or lower + stderr_filter='(^|dynamic linker, ignoring|given more than once|No such file or directory)$' + else # DEBUG or higher + stderr_filter=weird_al_yankovic_will_not_appear_in_ldconfig_output + fi lib_dest=$( { "${ch_bin}/ch-run" "$image" -- /sbin/ldconfig -Nv \ - 2>&1 1>&3 3>&- | grep -Ev '(^|dynamic linker, ignoring|given more than once)$' ; } \ + 2>&1 1>&3 3>&- | grep -Ev "$stderr_filter" ; } \ 3>&1 1>&2 | grep -E '^/' | cut -d: -f1 | head -1 ) - [ -n "$lib_dest" ] || fatal 'empty path from ldconfig' - [ -z "${lib_dest%%/*}" ] || fatal "bad path from ldconfig: ${lib_dest}" - debug "inferred shared library destination: ${image}/${lib_dest}" + [ -n "$lib_dest" ] || FATAL 'empty path from ldconfig' + [ -z "${lib_dest%%/*}" ] || FATAL "bad path from ldconfig: ${lib_dest}" + VERBOSE "inferred shared library destination: ${image}/${lib_dest}" fi if [ -n "$fi_prov_found" ]; then # The libfabric provider can be specified with FI_PROVIDER. The path the # search for shared providers at can be specified with FI_PROVIDER_PATH # (undocumented). This complicates the inferred destination because these - # variables can be inherited from the host or explicitly set in the image's - # /ch/environment - # file. + # variables can be inherited from the host or explicitly set in the + # image's /ch/environment file. # # For simplicity, the inferred injection destination is the always the - # 'libfabric' directory at the path where libfabric.so is found. If it does - # not exist, create it. Warn if FI_PROVIDER_PATH or FI_PROVIDER is found - # in the the image's /ch/environment file. - debug "searching ${image} for libfabric shared provider destination" + # 'libfabric' directory at the path where libfabric.so is found. If it + # does not exist, create it. Warn if FI_PROVIDER_PATH or FI_PROVIDER is + # found in the the image's /ch/environment file. + VERBOSE "searching ${image} for libfabric shared provider destination" ch_env_p=$(grep -E '^FI_PROVIDER_PATH=' "${image}/ch/environment") \ || true # avoid -e exit ch_env_p=${ch_env_p##*=} @@ -383,11 +369,11 @@ fi img_libfabric=$(find "$image" -name 'libfabric.so') img_libfabric_path=$(echo "$img_libfabric" | sed "s@$image@@") - debug_indent "found: ${image}${img_libfabric_path}" + DEBUG "found: ${image}${img_libfabric_path}" fi_prov_dest=$(dirname "/${img_libfabric_path}") fi_prov_dest="${fi_prov_dest}/libfabric" queue_mkdir "$fi_prov_dest" - debug "inferred provider destination: $fi_prov_dest" + VERBOSE "inferred provider destination: $fi_prov_dest" fi if [ -n "$print_lib_dest" ]; then @@ -409,22 +395,22 @@ queue_mkdir /var/lib/hugetlbfs # UGNI if [ ! -L /etc/opt/cray/release/cle-release ]; then - # ALPS libraries require the contents of this directory to be present at - # the same path as the host. Create the mount point here, then ch-run - # bind-mounts it later. + # ALPS libraries require the contents of this directory to be present + # at the same path as the host. Create the mount point here, then + # ch-run bind-mounts it later. queue_mkdir /var/opt/cray/alps/spool - # The cray-ugni provider will link against cray's libwlm_detect so. Create - # the mount point for ch-run. + # The cray-ugni provider will link against cray’s libwlm_detect so. + # Create the mount point for ch-run. queue_mkdir /opt/cray/wlm_detect - # libwlm_detect.so requires file(s) to present at the same path as the host. - # Create mount point for ch-run. + # libwlm_detect.so requires file(s) to present at the same path as the + # host. Create mount point for ch-run. queue_mkdir /etc/opt/cray/wlm_detect - # OFI uGNI provider, libgnix-fi.so, links against the Cray host's - # libxpmem, libudreg, libalpsutil, libalpslli, and libugni; create mount - # points for ch-run to use later. + # OFI uGNI provider, libgnix-fi.so, links against the Cray host’s + # libxpmem, libudreg, libalpsutil, libalpslli, and libugni; create + # mount points for ch-run to use later. queue_mkdir /opt/cray/udreg queue_mkdir /opt/cray/xpmem queue_mkdir /opt/cray/ugni @@ -432,47 +418,48 @@ fi # CXI (slingshot) if [ -f /opt/cray/etc/release/cos ]; then - # Newer Cray Shasta environments require the contents of this directory - # to be present at the same path as the host. Create mount points for - # ch-run to use later. + # Newer Cray Shasta environments require the contents of this + # directory to be present at the same path as the host. Create mount + # points for ch-run to use later. queue_mkdir /var/spool/slurmd fi fi -[ "$inject_files" ] || fatal "empty file list" +[ "$inject_files" ] || FATAL "empty file list" -debug "injecting into image: ${image}" +VERBOSE "injecting into image: ${image}" old_ifs="$IFS" IFS="$newline" # Process unlink list. for u in $inject_unlinks; do - debug_indent "rm -f ${image}${u}" + DEBUG "deleting: ${image}${u}" rm -f "${image}${u}" done # Process bind-mount destination targets. for d in $inject_mkdirs; do - debug_indent "mkdir -p ${image}${d}" + DEBUG "mkdir: ${image}${d}" mkdir -p "${image}${d}" done # Process ldconfig targets. if [ "$fi_prov_found" ] || [ "$cray_fi_found" ]; then if [ ! -f "${image}/etc/ld.so.conf" ]; then - debug_indent "touch ${image}/etc/ld.so.conf" + DEBUG "creating empty ld.so.conf" touch "${image}/etc/ld.so.conf" fi - if ! grep -F 'include ld.so.conf.d/*.conf' "${image}/etc/ld.so.conf" > /dev/null 2>&1; then - debug_indent "echo 'include ld.so.conf.d/*.conf' >> ${image}/etc/ld.so.conf" + if ! grep -F 'include ld.so.conf.d/*.conf' "${image}/etc/ld.so.conf" \ + > /dev/null 2>&1; then + DEBUG "ld.so.conf: adding 'include ld.so.conf.d/*.conf'" echo 'include ld.so.conf.d/*.conf' >> "${image}/etc/ld.so.conf" fi # Prepare image ch-ofi.conf. printf '' > "${image}/etc/ld.so.conf.d/ch-ofi.conf" # add ofi dso provider ld library dirs. for c in $ld_conf; do - debug_indent "echo '$c' >> ${image}/etc/ld.so.conf.d/ch-ofi.conf" + DEBUG "ld.so.conf: adding ${c}" echo "$c" >> "${image}/etc/ld.so.conf.d/ch-ofi.conf" done fi @@ -491,55 +478,58 @@ if ldd "$f" | grep libcxi > /dev/null 2>&1; then d=$cray_fi_dest fi - ;; + ;; *-fi.so) d=$fi_prov_dest - ;; + ;; *) d=$lib_dest - ;; + ;; esac infer=" (inferred)" fi - debug_indent "${f} -> ${d}${infer}" - [ "$d" ] || fatal "no destination for: ${f}" - [ -z "${d%%/*}" ] || fatal "not an absolute path: ${d}" - [ -d "${image}${d}" ] || fatal "not a directory: ${image}${d}" + VERBOSE "${f} -> ${d}${infer}" + [ "$d" ] || FATAL "no destination for: ${f}" + [ -z "${d%%/*}" ] || FATAL "not an absolute path: ${d}" + [ -d "${image}${d}" ] || FATAL "not a directory: ${image}${d}" if [ ! -w "${image}${d}" ]; then # Some images unpack with unwriteable directories; fix. This seems # like a bit of a kludge to me, so I'd like to remove this special # case in the future if possible. (#323) - info "${image}${d} not writeable; fixing" - chmod u+w "${image}${d}" || fatal "can't chmod u+w: ${image}${d}" + INFO "${image}${d} not writeable; fixing" + chmod u+w "${image}${d}" || FATAL "can't chmod u+w: ${image}${d}" fi cp --dereference --preserve=all "$f" "${image}${d}" \ - || fatal "cannot inject: ${f}" + || FATAL "cannot inject: ${f}" done IFS="$old_ifs" if [ -z "$no_ldconfig" ] \ - && { [ "$lib_found" ] || [ "$fi_prov_found" ] || [ "$cray_fi_found" ] ;} then - debug "running ldconfig" - debug_indent "${ch_bin}/ch-run -w $image -- /sbin/ldconfig" - "${ch_bin}/ch-run" -w "$image" -- /sbin/ldconfig 2> /dev/null || fatal 'ldconfig error' + && { [ "$lib_found" ] \ + || [ "$fi_prov_found" ] \ + || [ "$cray_fi_found" ] ;} then + VERBOSE "running ldconfig" + "${ch_bin}/ch-run" -w "$image" -- /sbin/ldconfig 2> /dev/null \ + || FATAL 'ldconfig error' if [ -n "$fi_prov_found" ] || [ -n "$cray_fi_found" ]; then - debug "validating ldconfig cache" + VERBOSE "validating ldconfig cache" for file in $inject_files; do f="$(basename "${file%%:*}")" - f=$("${ch_bin}/ch-run" "$image" -- find / \ - -not \( -path /proc -prune \) \ - -not \( -path /dev -prune \) \ - -not \( -path /tmp -prune \) \ - -not \( -path /sys -prune \) \ - -not \( -path /var/opt/cray -prune \) \ - -not \( -path /etc/opt/cray -prune \) \ - -name "$f") + f=$( "${ch_bin}/ch-run" "$image" \ + -- find / \ + -not \( -path /proc -prune \) \ + -not \( -path /dev -prune \) \ + -not \( -path /tmp -prune \) \ + -not \( -path /sys -prune \) \ + -not \( -path /var/opt/cray -prune \) \ + -not \( -path /etc/opt/cray -prune \) \ + -name "$f") if [ "$("${ch_bin}/ch-run" "$image" -- ldd "$f" | grep -c 'not found ')" -ne 0 ]; then - fatal "ldconfig: '${ch_bin}/ch-run $image -- ldd $f' failed" + FATAL "ldconfig: '${ch_bin}/ch-run $image -- ldd $f' failed" fi done fi else - debug "not running ldconfig" + VERBOSE "not running ldconfig" fi echo 'done' diff -Nru charliecloud-0.36/bin/ch-image.py.in charliecloud-0.37/bin/ch-image.py.in --- charliecloud-0.36/bin/ch-image.py.in 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/bin/ch-image.py.in 2024-02-29 20:52:27.000000000 +0000 @@ -1,17 +1,19 @@ #!%PYTHON_SHEBANG% import argparse +import ast +import collections.abc import inspect import os.path import sys ch_lib = os.path.dirname(os.path.abspath(__file__)) + "/../lib" sys.path.insert(0, ch_lib) -import build_cache as bu import charliecloud as ch import build -import misc +import build_cache as bu import filesystem as fs +import misc import pull import push @@ -107,6 +109,9 @@ [["--auth"], { "action": "store_true", "help": "authenticated registry access; implied by push" }], + [["--break"], + { "metavar": "MODULE:LINE", + "help": "break into PDB before LINE of MODULE" }], [["--cache-large"], { "metavar": "SIZE", "type": lambda s: ch.positive(s) * 2**20, # internal unit: bytes @@ -328,11 +333,100 @@ ch.exit(0) +## Functions ## + +def breakpoint_inject(module_name, line_no): + # Inject a PDB breakpoint into the module named module_name before the + # statement on line line_no. See: https://stackoverflow.com/a/41858422 + + class PDB_Injector(ast.NodeTransformer): + def __init__(self, *args, **kwargs): + self.inject_ct = 0 + return super().__init__(*args, **kwargs) + def generic_visit(self, parent): + # Operate on parent of target statement because we need to inject the + # new code into the parent’s body (i.e., as siblings of the target + # statement). + if ( self.inject_ct == 0 + and hasattr(parent, "body") + and isinstance(parent.body, collections.abc.Sequence)): + for (i, child) in enumerate(parent.body): + if ( isinstance(child, ast.stmt) + and hasattr(child, "lineno") + and child.lineno == line_no): + ch.WARNING( "--break: injecting PDB breakpoint: %s:%d (%s)" + % (module_name, line_no, type(child).__name__)) + parent.body[i:i] = inject_tree.body + self.inject_ct += 1 + break + super().generic_visit(parent) # superclass actually visits children + return parent + + if (module_name not in sys.modules): + ch.FATAL("--break: no module named %s" % module_name) + module = sys.modules[module_name] + src_text = inspect.getsource(module) + src_path = inspect.getsourcefile(module) + module_tree = ast.parse(src_text, "%s " % src_path) + inject_tree = ast.parse("import pdb; pdb.set_trace()", "Weird Al Yankovic") + + ijor = PDB_Injector() + ijor.visit(module_tree) # calls generic_visit() on all nodes + if (ijor.inject_ct < 1): + ch.FATAL("--break: no statement found at %s:%d" % (module_name, line_no)) + assert (ijor.inject_ct == 1) + + ast.fix_missing_locations(module_tree) + exec(compile(module_tree, "%s " % src_path, "exec"), + module.__dict__) + # Set a global in the target module so it can test if it’s been + # re-executed. This means re-execution is *complete*, so it will not be set + # in module-level code run during re-execution, but if the original + # execution continues *after* re-execution completes (this happens for + # __main__), it *will* be set in that code. + module.__dict__["breakpoint_reexecuted"] = "%s:%d" % (module_name, line_no) + + ## Bootstrap ## +# This code is more complicated than the standard boilerplace (i.e., “if +# (__name__ == "__main__"): main()”) for two reasons: +# +# 1. The mechanism for fatal errors is to raise ch.Fatal_Error. We catch +# this to re-print warnings and print the error message before exiting. +# (We used to priont an error message and then sys.exit(1), but this +# approach lets us do things like rollback and fixes ordering problems +# such as #1486.) +# +# 2. There is a big mess of hairy code to let us set PDB breakpoints in this +# file (i.e., module __main__) with --break. See PR #1837. + if (__name__ == "__main__"): try: - main() + # We can’t set these two module globals that support --break normally + # (i.e., module-level code at the top of this file) because this module + # might be executed twice, and thus any value we set would be + # overwritten by the default when the module is re-executed. + if ("breakpoint_considered" not in globals()): + global breakpoint_considered + breakpoint_considered = True + # A few lines of bespoke CLI parsing so that we can inject + # breakpoints into the CLI parsing code itself. + for (opt, arg) in zip(sys.argv[1:], sys.argv[2:] + [None]): + (opt, _, arg_eq) = opt.partition("=") + if (opt == "--break"): + if (arg_eq != ""): + arg = arg_eq + try: + (module_name, line_no) = arg.split(":") + line_no = int(line_no) + except ValueError: + ch.FATAL("--break: can’t parse MODULE:LIST: %s" % arg) + breakpoint_inject(module_name, line_no) + # If we injected into __main__, we already ran main() when re-executing + # this module inside breakpoint_inject(). + if ("breakpoint_reexecuted" not in globals()): + main() except ch.Fatal_Error as x: ch.warnings_dump() ch.ERROR(*x.args, **x.kwargs) diff -Nru charliecloud-0.36/bin/ch-test charliecloud-0.37/bin/ch-test --- charliecloud-0.36/bin/ch-test 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/bin/ch-test 2024-02-29 20:52:27.000000000 +0000 @@ -363,6 +363,7 @@ ;; *) fatal "scope '$scope' invalid" + ;; esac } @@ -516,9 +517,12 @@ builder_exclude=$(cat "$img" | grep -F 'ch-test-builder-exclude: ' \ | sed 's/.*: //' | awk '{print $1}') - img_scope_int=$(scope_to_digit "$(cat "$img" | grep -F 'ch-test-scope' \ - | sed 's/.*: //' \ - | awk '{print $1}')") + img_scope_str=$(cat "$img" | grep -F 'ch-test-scope' \ + | sed 's/.*: //' \ + | awk '{print $1}') + [[ -n $img_scope_str ]] || fatal "no scope: $img" + img_scope_int=$(scope_to_digit "$img_scope_str") + [[ -n $img_scope_int ]] || exit 1 # set -e not working, why? sudo_required=$(cat "$img" | grep -F 'ch-test-need-sudo') diff -Nru charliecloud-0.36/configure.ac charliecloud-0.37/configure.ac --- charliecloud-0.36/configure.ac 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/configure.ac 2024-02-29 20:52:27.000000000 +0000 @@ -63,12 +63,6 @@ AM_INIT_AUTOMAKE([1.13 -Wall -Werror foreign subdir-objects]) -# Check for “pkg-config”. It’s here because we use PKG_CHECK_MODULES -# conditionally later and we want to make sure this always happens [1, §3.4]. -# -# [1]: https://autotools.info/pkgconfig/pkg_check_modules.html -PKG_PROG_PKG_CONFIG - AC_CONFIG_HEADERS([bin/config.h]) AC_CONFIG_FILES([Makefile bin/Makefile @@ -491,17 +485,22 @@ have_libsquashfuse_ll=n/a have_ll_h=n/a AS_IF([test $want_libsquashfuse = yes], [ - # libfuse3. Must use pkg-config because as of version 0.5.0 SquashFUSE’s - # ll.h won’t build without an appropriate -I [1]. This macro defines some - # variables that we use here; see this third-party documentation [2]. (I - # could not find first-party docs for it.) + # libfuse3. As of version 0.5.0, SquashFUSE’s ll.h won’t build without an + # appropriate -I [1]. Presently we use pkg-config to find it, but see #1844. + # + # We avoid PKG_CHECK_MODULES because it introduces a dependency on + # pkg-config at autogen.sh time, with impressively incomprehensible error + # messages if it’s not met [2]. The approach below also seems simpler [3]? # # [1]: https://github.com/vasi/squashfuse/commit/eca5764 - # [2]: https://autotools.info/pkgconfig/pkg_check_modules.html - PKG_CHECK_MODULES([fuse3], [fuse3], [ - # libfuse3 found + # [2]: https://ae1020.github.io/undefined-macro-pkg-config/ + # [3]: https://tirania.org/blog/archive/2012/Oct-20.html + AC_CHECK_PROG(have_pkg_config, pkg-config, yes, no) + AS_IF([test $have_pkg_config != yes], + [AC_MSG_ERROR([need pkg-config to find libfuse3; try --with-libsquashfuse=no or see issue @%:@1844])]) + AS_IF([pkg-config --exists fuse3], [ have_libfuse3=yes - CFLAGS="$CFLAGS $fuse3_CFLAGS" + CFLAGS="$CFLAGS $(pkg-config --cflags fuse3)" # libsquashfuse? AC_CHECK_LIB([squashfuse_ll], [sqfs_ll_mount], [have_libsquashfuse_ll=yes], diff -Nru charliecloud-0.36/debian/changelog charliecloud-0.37/debian/changelog --- charliecloud-0.36/debian/changelog 2024-02-03 20:52:40.000000000 +0000 +++ charliecloud-0.37/debian/changelog 2024-03-07 19:06:12.000000000 +0000 @@ -1,3 +1,13 @@ +charliecloud (0.37-1) unstable; urgency=medium + + * New upstream version 0.37 (Closes: #1063467) + * Add new man page for ch-completion.bash + * Replace obsolete build dependency pkg-config by pkgconf + * Run reprotest with diffoscope + * Add patch from upstream PR 1856 to fix reproducibility issue + + -- Peter Wienemann Thu, 07 Mar 2024 20:06:12 +0100 + charliecloud (0.36-1) unstable; urgency=medium * New upstream version 0.36 diff -Nru charliecloud-0.36/debian/charliecloud-common.manpages charliecloud-0.37/debian/charliecloud-common.manpages --- charliecloud-0.36/debian/charliecloud-common.manpages 2024-02-03 20:52:40.000000000 +0000 +++ charliecloud-0.37/debian/charliecloud-common.manpages 2024-03-07 19:06:12.000000000 +0000 @@ -1 +1,2 @@ debian/tmp/usr/share/man/man7/charliecloud.7 +debian/tmp/usr/share/man/man7/ch-completion.bash.7 diff -Nru charliecloud-0.36/debian/control charliecloud-0.37/debian/control --- charliecloud-0.36/debian/control 2024-02-03 20:52:40.000000000 +0000 +++ charliecloud-0.37/debian/control 2024-03-07 19:06:12.000000000 +0000 @@ -11,7 +11,7 @@ debhelper-compat (= 13), libfuse3-dev, libsquashfuse-dev, - pkg-config, + pkgconf, po-debconf, python3-sphinx-rtd-theme Standards-Version: 4.6.2 diff -Nru charliecloud-0.36/debian/patches/pr1856.patch charliecloud-0.37/debian/patches/pr1856.patch --- charliecloud-0.36/debian/patches/pr1856.patch 1970-01-01 00:00:00.000000000 +0000 +++ charliecloud-0.37/debian/patches/pr1856.patch 2024-03-07 19:06:12.000000000 +0000 @@ -0,0 +1,23 @@ +From: Peter Wienemann +Date: Thu, 7 Mar 2024 19:02:02 +0100 +Subject: doctest-auto: Ensure locale-independent output + +Forwarded: https://github.com/hpc/charliecloud/pull/1856 +--- + test/doctest-auto | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/test/doctest-auto b/test/doctest-auto +index 9b73659..e9c4c64 100755 +--- a/test/doctest-auto ++++ b/test/doctest-auto +@@ -4,6 +4,9 @@ + + set -e -o pipefail + ++# Ensure reproducible output ++export LC_ALL=C ++ + cat <`_ version 1.96. diff -Nru charliecloud-0.36/doc/best_practices.rst charliecloud-0.37/doc/best_practices.rst --- charliecloud-0.36/doc/best_practices.rst 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/doc/best_practices.rst 2024-02-29 20:52:27.000000000 +0000 @@ -182,46 +182,41 @@ ----------------------------------------- Under this method, one uses :code:`RUN` commands to fetch the desired software -using :code:`curl` or :code:`wget`, compile it, and install. Our example does -this with two chained Dockerfiles. First, we build a basic AlmaLinux image -(:code:`examples/Dockerfile.almalinux_8ch`): - - .. literalinclude:: ../examples/Dockerfile.almalinux_8ch - :language: docker - :lines: 2- - -Then, in a second image (:code:`examples/Dockerfile.openmpi`), we add OpenMPI. -This is a complex Dockerfile that compiles several dependencies in addition to -OpenMPI. For the purposes of this documentation, you can skip most of it, but -we felt it would be useful to show a real example. +using :code:`curl` or :code:`wget`, compile it, and install. Our example +(:code:`examples/Dockerfile.almalinux_8ch`) does this with ImageMagick: -.. literalinclude:: ../examples/Dockerfile.openmpi +.. literalinclude:: ../examples/Dockerfile.almalinux_8ch :language: docker :lines: 2- So what is going on here? -1. Use the latest AlmaLinux 8 as the base image. +#. Use the latest AlmaLinux 8 as the base image. -2. Install a basic build system using the OS package manager. +#. Install some packages using :code:`dnf`, the OS package manager, including + a basic development environment. -3. For a few dependencies and then OpenMPI itself: +#. Install :code:`wheel` using :code:`pip` and adjust the shared library + configuration. (These are not needed for ImageMagick but rather support + derived images.) - 1. Download and untar. Note the use of variables to make adjusting the URL - and versions easier, as well as the explanation of why we’re not using - :code:`dnf`, given that several of these packages are included in - CentOS. +#. For ImageMagick itself: - 2. Build and install OpenMPI. Note the :code:`getconf` trick to guess at an + #. Download and untar. Note the use of the variable :code:`MAGICK_VERSION` + and versions easier. + + #. Build and install. Note the :code:`getconf` trick to guess at an appropriate parallel build. -4. Clean up, in order to reduce the size of the build cache as well as the - resulting Charliecloud image (:code:`rm -Rf`). + #. Clean up, in order to reduce the size of the build cache as well as the + resulting Charliecloud image (:code:`rm -Rf`). + +.. note:: -.. Finally, because it’s a container image, you can be less tidy than you - might be on a normal system. For example, the above downloads and builds in - :code:`/` rather than :code:`/usr/local/src`, and it installs MPI into - :code:`/usr` rather than :code:`/usr/local`. + Because it’s a container image, you can be less tidy than you might + normally be. For example, we install ImageMagick directly into + :code:`/usr/local` rather than using something like `GNU Stow + `_ to organize this directory tree. Your software stored in the image --------------------------------- @@ -309,4 +304,4 @@ .. LocalWords: userguide Gruening Souppaya Morello Scarfone openmpi nist -.. LocalWords: ident OCFS +.. LocalWords: ident OCFS MAGICK diff -Nru charliecloud-0.36/doc/ch-completion.bash.rst charliecloud-0.37/doc/ch-completion.bash.rst --- charliecloud-0.36/doc/ch-completion.bash.rst 1970-01-01 00:00:00.000000000 +0000 +++ charliecloud-0.37/doc/ch-completion.bash.rst 2024-02-29 20:52:27.000000000 +0000 @@ -0,0 +1,98 @@ +.. _ch-completion.bash: + +:code:`ch-completion.bash` +++++++++++++++++++++++++++ + +.. only:: not man + + Tab completion for the Charliecloud command line. + + +Synopsis +======== + +:: + + $ source ch-completion.bash + + +Description +=========== + +:code:`ch-completion.bash` provides tab completion for the charliecloud +command line. Currently, tab completion is available for Bash users for the +executables :code:`ch-image`, :code:`ch-run`, and :code:`ch-convert`. + +We do not currently install the file if Charliecloud is built from source (see +`issue #1842 `_). In this +case, source it from the Charliecloud source code:: + + $ source $CHARLIECLOUD_SOURCE_PATH/bin/ch-completion.bash + +If you installed with a distribution package, the procedure is probably nicer. +See your distro’s docs if you installed a package.) + +Disable completion with the utility function :code:`ch-completion` added to +your environment when the above is sourced:: + + $ ch-completion --disable + + +Dependencies +============ + +Tab completion has these additional dependencies: + +* Bash ≥ 4.3.0 + +* :code:`bash-completion` library (`GitHub + `_, or it probably comes with your + distribution, `e.g. + `_) + + +.. _ch-completion_func: + +:code:`ch-completion` +===================== + +Utility function for :code:`ch-completion.bash`. + +Synopsis +-------- + +:: + + $ ch-completion [ OPTIONS ] + + +Description +----------- + +:code:`ch-completion` is a function to manage Charliecloud’s tab completion. +It is added to the environment when completion is sourced. The option(s) given +specify what to do: + +:code:`--disable` + Disable tab completion for all Charliecloud executables. + +:code:`--help` + Print help message. + +:code:`--version` + Print version of tab completion that’s currently enabled. + +:code:`--version-ok` + Verify that tab completion version is consistent with that of + :code:`ch-image`. + + +Debugging +========= + +Tab completion can write debugging logs to :code:`/tmp/ch-completion.log`. +Enable this by setting the environment variable :code:`CH_COMPLETION_DEBUG`. +(This is primarily intended for developers.) + + +.. LocalWords: func diff -Nru charliecloud-0.36/doc/ch-fromhost.rst charliecloud-0.37/doc/ch-fromhost.rst --- charliecloud-0.36/doc/ch-fromhost.rst 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/doc/ch-fromhost.rst 2024-02-29 20:52:27.000000000 +0000 @@ -27,8 +27,8 @@ The purpose of this command is to inject arbitrary host files into a container necessary to access host specific resources; usually GPU or proprietary -interconnets. **It is not a general copy-to-image tool**; see further discussion -on use cases below. +interconnects. **It is not a general copy-to-image tool**; see further +discussion on use cases below. It should be run after:code:`ch-convert` and before :code:`ch-run`. After invocation, the image is no longer portable to other hosts. @@ -91,12 +91,12 @@ :code:`-p`, :code:`--path PATH` Inject the file at :code:`PATH`. - :code:`--cray-mpi-cxi` + :code:`--cray-cxi` Inject cray-libfabric for slingshot. This is equivalent to :code:`--path $CH_FROMHOST_OFI_CXI`, where :code:`$CH_FROMHOST_OFI_CXI` is the path the Cray host libfabric :code:`libfabric.so`. - :code:`--cray-mpi-gni` + :code:`--cray-gni` Inject cray gemini/aries GNI libfabric provider :code:`libgnix-fi.so`. This is equivalent to :code:`--fi-provider $CH_FROMHOST_OFI_GNI`, where :code:`CH_FROMHOST_OFI_GNI` is the path to the Cray host ugni provider @@ -120,10 +120,13 @@ Additional arguments -------------------- - :code:`--fi-path` - Print the guest destination path for libfabric providers and replacement. + :code:`--print-cray-fi` + Print inferred destination for libfabric replacement. - :code:`--lib-path` + :code:`--print-fi` + Print the guest destination path for libfabric provider(s). + + :code:`--print-lib` Print the guest destination path for shared libraries inferred as described above. @@ -135,11 +138,35 @@ Print help and exit. :code:`-v`, :code:`--verbose` - List the injected files. + Be more verbose about what is going on. Can be repeated. :code:`--version` Print version and exit. +.. warning:: + + :code:`ldconfig` often prints scary-looking warnings on stderr even + everything is going well. By default, we suppress these, but you can see + them with sufficient verbosity. For example:: + + $ ch-fromhost --print-lib /var/tmp/bullseye + /usr/local/lib + $ ch-fromhost -v --print-lib /var/tmp/bullseye + asking ldconfig for inferred shared library destination + inferred shared library destination: /var/tmp/bullseye//usr/local/lib + /usr/local/lib + $ ch-fromhost -v -v --print-lib /var/tmp/bullseye + asking ldconfig for inferred shared library destination + /sbin/ldconfig: Can't stat /usr/local/lib/x86_64-linux-gnu: No such file or directory + /sbin/ldconfig: Path `/lib/x86_64-linux-gnu' given more than once + /sbin/ldconfig: Path `/usr/lib/x86_64-linux-gnu' given more than once + /sbin/ldconfig: /lib/x86_64-linux-gnu/ld-2.31.so is the dynamic linker, ignoring + inferred shared library destination: /var/tmp/bullseye//usr/local/lib + /usr/local/lib + + See `issue #732 `_ for an + example of how this was confusing for users. + When to use :code:`ch-fromhost` =============================== diff -Nru charliecloud-0.36/doc/ch-image.rst charliecloud-0.37/doc/ch-image.rst --- charliecloud-0.36/doc/ch-image.rst 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/doc/ch-image.rst 2024-02-29 20:52:27.000000000 +0000 @@ -66,6 +66,23 @@ default is to never authenticate, i.e., make all requests anonymously. The exception is :code:`push`, which implies :code:`--auth`. + :code:`--break MODULE:LINE` + Set a `PDB `_ breakpoint at + line number :code:`LINE` of module named :code:`MODULE` (typically the + filename with :code:`.py` removed, or :code:`__main__` for + :code:`ch-image` itself). That is, a PDB debugger shell will open before + executing the specified line. + + This is accomplished by re-parsing the module, injecting :code:`import + pdb; pdb.set_trace()` into the parse tree, re-compiling the tree, and + replacing the module’s code with the result. This has various gotchas, + including (1) module-level code in the target module is executed twice, + (2) the option is parsed with bespoke early code so command line argument + parsing itself can be debugged, (3) breakpoints on function definition + will trigger while the module is being re-executed, not when the function + is called (break on the first line of the function body instead), and + (4) other weirdness we haven’t yet characterized. + :code:`--cache` Enable build cache. Default if a sufficiently new Git is available. See section :ref:`Build cache ` for details. @@ -2118,4 +2135,4 @@ .. LocalWords: dlcache graphviz packfile packfiles bigFileThreshold fd Tpdf .. LocalWords: pstats gprof chofile cffd cacdb ARGs NSYNC dst imgroot popt .. LocalWords: globbed ni AHSXpr drwxrwx ctx sym nom newB newC newD dstC -.. LocalWords: dstB dstF dstG upover drwx kexec +.. LocalWords: dstB dstF dstG upover drwx kexec pdb diff -Nru charliecloud-0.36/doc/charliecloud.rst charliecloud-0.37/doc/charliecloud.rst --- charliecloud-0.36/doc/charliecloud.rst 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/doc/charliecloud.rst 2024-02-29 20:52:27.000000000 +0000 @@ -11,6 +11,7 @@ -------- ch-checkns(1), +ch-completion.bash(7), ch-convert(1), ch-fromhost(1), ch-image(1), diff -Nru charliecloud-0.36/doc/conf.py charliecloud-0.37/doc/conf.py --- charliecloud-0.36/doc/conf.py 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/doc/conf.py 2024-02-29 20:52:27.000000000 +0000 @@ -279,6 +279,9 @@ ("ch-checkns", "ch-checkns", 'Check "ch-run" prerequisites, e.g., namespaces and "pivot_root(2)"', [], 1), + ("ch-completion.bash", "ch-completion.bash", + 'Tab completion for the Charliecloud command line', + [], 7), ("ch-convert", "ch-convert", 'Convert an image from one format to another', [], 1), diff -Nru charliecloud-0.36/doc/dev.rst charliecloud-0.37/doc/dev.rst --- charliecloud-0.36/doc/dev.rst 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/doc/dev.rst 2024-02-29 20:52:27.000000000 +0000 @@ -13,10 +13,10 @@ .. note:: - We’re interested in and will consider all good-faith contributions. While + We are interested in and will consider all good-faith contributions. While it does make things easier and faster if you follow the guidelines here, - they are not required. We’ll either clean it up for you or walk you through - any necessary changes. + *they are not required*. We’ll either clean it up for you or walk you + through any necessary changes. Workflow @@ -24,7 +24,7 @@ We try to keep procedures and the Git branching model simple. Right now, we’re pretty similar to Scott Chacon’s “`GitHub Flow -`_”: Master is stable; +`_”: Master is stable; work on short-lived topic branches; use pull requests to ask for merging; keep issues organized with tags and milestones. The standard workflow is: @@ -42,89 +42,111 @@ 6. Review/iterate. - 7. Project lead merges. + 7. Project lead merges with “squash and merge”. -Core team members may deliberate in public on GitHub or internally, whichever -they are comfortable with, making sure to follow LANL policy and taking into -account the probable desires of the recipient as well. +Code review +----------- -Milestones ----------- +**Issues and pull requests.** The typical workflow is: -We use milestones to organize what we plan to do next and what happened in a -given release. There are two groups of milestones: +#. Propose a change in an issue. -* :code:`next` contains the issues that we plan to complete soon but have not - yet landed on a specific release. Generally, we avoid putting PRs in here - because of their ticking clocks. - -* Each release has a milestone. These are dated with the target date for that - release. We put an issue in when it has actually landed in that release or - we are willing to delay that release until it does. We put a PR in when we - think it’s reasonably likely to be merged for that release. - -If an issue is assigned to a person, that means they are actively leading the -work on it or will do so in the near future. Typically this happens when the -issue ends up in :code:`next`. Issues in a status of "I’ll get to this later" -should not be assigned to a person. +#. Get consensus on what to do, whether in the issue or elsewhere. -Peer review ------------ +#. Create a `pull request + `_ (PR) + for the implementation. + +#. Iterate the PR until consensus is reached to either merge or abandon. -**Issues and pull requests.** The standard workflow is to introduce a change -in an issue, get consensus on what to do, and then create a *draft* `pull -request `_ -(PR) for the implementation. +#. Merge or close the PR accordingly. The issue, not the PR, should be tagged and milestoned so a given change shows up only once in the various views. -If consensus is obtained through other means (e.g., in-person discussion), -then open a PR directly. In this case, the PR should be tagged and milestoned, -since there is no issue. +GitHub PRs have two states, which are often poorly labeled. These states and +our interpretations are: -**Address a single concern.** When possible, issues and PRs should address +* *Ready for review* (the green *Create pull request* button). This means that + the PR is ready to be merged once tests and code review pass. In-progress + PRs headed in that direction should also be in this state (i.e., the trigger + for review and possible merge is the review request, not a draft to + ready-for-review transition). + +* *Draft*. This means not ready for merge even if tests and review pass. + (GitLab would indicate this with a :code:`WIP:` prefix in the title.) + +**Stand-alone PRs.** If consensus is obtained through other means, e.g. +out-of-band discussion, then a stand-alone PR is appropriate (i.e., don’t +create an issue just for the sake of having an issue to link to a PR). A +stand-alone PR should be tagged and milestoned, since there is no issue. Note +that stand-alone PRs are generally not a good way to *propose* something. + +**Address a single concern.** When practical, issues and PRs should address completely one self-contained change. If there are multiple concerns, make separate issues and/or PRs. For example, PRs should not tidy unrelated code, and non-essential complications should be split into a follow-on issue. +However, sometimes one PR addresses several related issues, which is fine. **Documentation and tests first.** The best practice for significant changes is to draft documentation and/or tests first, get feedback on that, and then implement the code. Reviews of the form "you need a completely different approach" are no fun. -**Tests must pass.** PRs will not be merged until they pass the tests. While -this most saliently includes CI, the tests should also pass on your -development box as well as all relevant clusters (if appropriate for the -changes). +**CI must pass.** PRs will usually not be merged until they pass CI, with +exceptions if the failures are clearly unconnected and we are confident they +aren’t masking a real issue. If appropriate, tests should also pass on +relevant supercomputers. -**No close keywords in PRs.** While GitHub will interpret issue-closing -keywords (variations on `"closes", "fixes", and "resolves" +**Use close keywords in PRs.** Use the issue-closing keywords (variations on +`"closes", "fixes", and "resolves" `_) in PR -descriptions, don’t use this feature, because often the specific issues a PR -closes change over time, and we don’t want to have to edit the description to -deal with that. We also want this information in only one place (the commit -log). Instead, use “addresses”, and we’ll edit the keywords into the commit -message(s) at merge time if needed. - -**PR review procedure.** When your draft PR is ready for review — which may or -may not be when you want it considered for merging! — do one or both of: - -* Request review from the person(s) you want to look at it. If you think it - may be ready for merge, that should include the project lead. The purpose of - requesting review is so the person is notified you need their help. - -* If you think it may be ready to merge (even if you’re not sure), then also - mark the PR "ready to review". The purpose of this is so the project lead - can see which PRs are ready to consider for merging (green icon) and which - are not (gray icon). If the project lead decides it’s ready, they will - merge; otherwise, they’ll change it back to draft. +descriptions to link it to the relevant issue(s). If this changes, edit the +description to add/remove issues. + +**PR review procedure.** When your PR is ready for review — which may or may +not be when you want it considered for merging! — do this: + +#. Request review from the person(s) you want to look at it. The purpose of + requesting review is so the person is notified you need their help. + +#. If you think it’s ready to merge (even if you’re not sure), ensure the PR + is (1) marked “ready for review” (green icon), and (2) the project lead is + included in your review request. In both cases, the person from whom you requested review now owns the branch, -and you should stop work on it unless and until you get it back. +and you should stop work on it unless and until you get it back (modulo other +communication, of course). This is so they can make tidy commits if needed +without collision. + +It is good practice to communicate with your reviewer directly to set +expectations on review urgency. + +Review outcomes: + +* *Request changes*: The reviewer believes there are changes needed, *and* the + PR needs re-review after these are done. + +* *Comment*: The reviewer has questions or comments, *and* the PR needs + re-review after these are addressed. + +* *Approve*: The reviewer believes the branch is ready to proceed (further + work if draft, merging if ready for review). Importantly, the review can + include comments/questions/changes *but* the reviewer believes these don’t + need re-review (i.e., the PR author can deal with them independently). + +*Use multi-comment reviews.* Review comments should all be packaged up into a +single review; click *Start a review* rather than *Add single comment*. Then +the PR author gets only a single notification instead of one for every comment +you make, and it’s clear when the branch is theirs again. -Do not hesitate to pester your reviewer if you haven’t heard back promptly, -say within 24 hours. +*Selecting a reviewer.* Generally, you want to find a reviewer with time to do +the review and appropriate expertise. Feel free to ask if you’re not sure. +Note that the project lead must approve any PRs before merge, so they are +typically a reasonable choice if you don’t have someone else in mind. + +External contributions do not need to select a reviewer. The team will notice +the PR and wrangle its review. *Special case 1:* Often, the review consists of code changes, and the reviewer will want you to assess those changes. GitHub doesn’t let you request review @@ -135,45 +157,51 @@ so this needs to be done with a comment too. Generally you should ask the original bug reporter to review, to make sure it solves their problem. -**Use multi-comment reviews.** Review comments should all be packaged up into -a single review; click *Start a review* rather than *Add single comment*. Then -the PR author gets only a single notification instead of one for every comment -you make, and it’s clear when they branch is theirs again. - Branching and merging --------------------- **Don’t commit directly to master.** Even the project lead doesn’t do this. While it may appear that some trivial fixes are being committed to the master -directly, what’s really happening is that these are prototyped on a branch and -then fast-forward merged after the tests pass. +directly, what really happened is that these were prototyped on a branch and +then fast-forward merged after the tests pass. (Note we no longer do this.) **Merging to master.** Only the project lead should do this. +**Branch naming convention.** Name the branch with a *brief* summary of the +issue being fixed — just a couple words — with words separated by hyphens, +then an underscore and the issue number being addressed. For example, issue +`#1773 `_ is titled +“:code:`ch-image build`: :code:`--force=fakeroot` outputs to stderr despite +:code:`-q`”; the corresponding branch (for `PR #1812 +`_) is called +:code:`fakeroot-quiet-rhel_1773`. Something even shorter, such as +:code:`fakeroot_1773`, would have been fine too. + +Stand-alone PRs do the same, just without an issue number. For example, `PR +#1804 `_ is titled “add tab +completion to :code:`ch-convert`” and the branch is +:code:`convert-completion`. + +It’s okay if the branch name misses a little. For example, if you discover +during work on a PR that you should close a second issue in the same PR, it’s +not necessary to add the second issue number to the branch name. + **Branch merge procedure.** Generally, branches are merged in the GitHub web interface with the *Squash and merge* button, which is :code:`git merge --squash` under the hood. This squashes the branch into a single commit on -master. Commit message example:: +master. - PR #268 from @j-ogas: remove ch-docker-run (closes #258) +Commit message must be the PR number followed by the PR title, e.g.: -If the branch closes multiple issues and it’s reasonable to separate those -issues into independent commits, then the branch is rebased, interactively -squashed, and force-pushed into a tidy history with close instructions, then -merged in the web interface with *Create a merge commit*. Example history and -commit messages:: - - * 18aa2b8 merge PR #254 from @j-ogas and me: Dockerfile.openmpi: use snapshot - |\ - | * 79fa89a upgrade to ibverbs 20.0-1 (closes #250) - | * 385ce16 Dockerfile.debian9: use snapshot.debian.org (closes #249) - |/ - * 322df2f ... + PR #268: remove ch-docker-run + +The commit message should not mention issue numbers; let the PR itself do +that. The reason to prefer merge via web interface is that GitHub often doesn’t notice merges done on the command line. -After merge, the branch is deleted via the web interface. +After merge, delete the branch via the web interface. **Branch history tidiness.** Commit frequently at semantically relevant times, and keep in mind that this history will probably be squashed per above. It is @@ -187,10 +215,10 @@ rebase works through a stack of commits. Note that PRs with merge conflicts will generally not be merged. Resolve -conflicts before asking for merge. +conflicts before asking for review. -**Remove obsolete branches.** Keep your repo free of old branches with -:code:`git branch -d` (or :code:`-D`) and :code:`git fetch --prune --all`. +**Remove obsolete branches.** Keep your repo free of old branches with the +script :code:`misc/branches-tidy`. Miscellaneous issue and pull request notes ------------------------------------------ @@ -204,14 +232,14 @@ awaiting this. Unlike many projects, we do not automatically close issues just because they’re old. -**Closing PR.** Stale PRs, on the other hand, are to be avoided due to bit +**Closing PRs.** Stale PRs, on the other hand, are to be avoided due to bit rot. We try to either merge or reject PRs in a timely manner. **Re-opening issues.** Closed issues can be re-opened if new information arises, for example a :code:`worksforme` issue with new reproduction steps. -Continuous integration testing ------------------------------- +Continuous integration (CI) testing +----------------------------------- **Quality of testing.** Tagged versions currently get more testing for various reasons. We are working to improve testing for normal commits on master, but @@ -220,12 +248,10 @@ **Cycles budget.** The resource is there for your use, so take advantage of it, but be mindful of the various costs of this compute time. -Things you can do include testing locally first, cancelling jobs you know will +Things you can do include focused local testing, cancelling jobs you know will fail or that won’t give you additional information, and not pushing every -commit (CI tests only the most recent commit in a pushed group). - -**Iterating.** When trying to make CI happy, force-push or squash-merge. Don’t -submit a PR with half a dozen "fix CI" commits. +commit (CI tests only the most recent commit in a pushed group). Avoid making +commits merely to trigger CI. **Purging Docker cache.** :code:`misc/docker-clean.sh` can be used to purge your Docker cache, either by removing all tags or deleting all containers and @@ -237,13 +263,14 @@ -------------- We use the following labels (a.k.a. tags) to organize issues. Each issue (or -stand-alone PR) should have label(s) from every category, with the exception -of disposition which only applies to closed issues. - -Charliecloud team members should label their own issues. Members of the -general public are more than welcome to label their issues if they like, but -in practice this is rare, which is fine. Whoever triages the incoming issue -should add or adjust labels as needed. +stand-alone PR) should have label(s) from each category, with the exception of +disposition which only applies to closed issues. Labels are periodically +validated using a script. + +Charliecloud team members should label their own issues. The general public +are more than welcome to label their issues if they like, but in practice this +is rare, which is fine. Whoever triages the incoming issue should add or +adjust labels as needed. .. note:: @@ -405,6 +432,11 @@ feels like they should be reproducible but we’re missing it somehow; such bugs should be left open in hopes of new insight arising. +.. note:: + + We do not use the GitHub “closed as not planned” feature, so everything is + “closed as completed” even if the reason is one of the above. + Deprecated labels ~~~~~~~~~~~~~~~~~ @@ -607,13 +639,15 @@ Dependencies ------------ - * charliecloud + * Charliecloud * Python 3.6+ - * Either: + * either: - * the provided example :code:`centos_7ch` or :code:`almalinux_8ch` images + * the provided example :code:`centos_7ch` or :code:`almalinux_8ch` images, + or * a RHEL/CentOS 7 or newer container image with (note there are different python version names for the listed packages in RHEL 8 and derivatives): + * autoconf * automake * gcc @@ -778,18 +812,15 @@ additional dependencies that are reasonably expected on most systems where the convenience would be used. - * Features that only work if some other software is present (example: the - Docker wrapper scripts) can add dependencies of that other software. + * Features that only work if some other software is present can add + dependencies of that other software (e.g., :code:`ch-convert` depends on + Docker to convert to/from Docker image storage). The test suite is tricky, because we need a test framework and to set up complex test fixtures. We have not yet figured out how to do this at reasonable expense with dependencies as tight as run- and build-time, so there are systems that do support Charliecloud but cannot run the test suite. -Building the documentation needs Sphinx features that have not made their way -into common distributions (i.e., RHEL), so we use recent versions of Sphinx -and provide a source distribution with pre-built documentation. - Building the RPMs should work on RPM-based distributions with a kernel new enough to support Charliecloud. You might need to install additional packages (but not from third-party repositories). @@ -832,14 +863,6 @@ "$foo/bar" # no "${foo}" # no -* Quote the entire string instead of just the variable when practical: - - .. code-block:: none - - "${foo}/bar" # yes - "${foo}"/bar # no - "$foo"/bar # no - * Don’t quote variable assignments or other places where not needed (e.g., case statements). E.g.: @@ -911,7 +934,8 @@ Indentation width ~~~~~~~~~~~~~~~~~ -3 spaces per level. No tab characters. +`3 spaces `_ per level. No tab +characters. C code @@ -1010,6 +1034,62 @@ list_append((void **)list, &bar, sizeof(char *)); // OK +Debugging +========= + +Python :code:`printf(3)`-style debugging +---------------------------------------- + +Consider :code:`ch.ILLERI()`. This uses the same mechanism as the standard +logging functions (:code:`ch.INFO()`, :code:`ch.VERBOSE()`, etc.) but it +(1) cannot be suppressed and (2) uses a color that stands out. + +All :code:`ch.ILLERI()` calls must be removed before a PR can be merged. + +:code:`seccomp(2)` BPF +---------------------- + +:code:`ch-run --seccomp -vv` will log the BPF instructions as they are +computed, but it’s all in raw hex and hard to interpret, e.g.:: + + $ ch-run --seccomp -vv alpine:3.17 -- true + [...] + ch-run[62763]: seccomp: arch c00000b7: found 13 syscalls (ch_core.c:582) + ch-run[62763]: seccomp: arch 40000028: found 27 syscalls (ch_core.c:582) + [...] + ch-run[62763]: seccomp(2) program has 156 instructions (ch_core.c:591) + ch-run[62763]: 0: { op=20 k= 4 jt= 0 jf= 0 } (ch_core.c:423) + ch-run[62763]: 1: { op=15 k=c00000b7 jt= 0 jf= 17 } (ch_core.c:423) + ch-run[62763]: 2: { op=20 k= 0 jt= 0 jf= 0 } (ch_core.c:423) + ch-run[62763]: 3: { op=15 k= 5b jt=145 jf= 0 } (ch_core.c:423) + [...] + ch-run[62763]: 154: { op= 6 k=7fff0000 jt= 0 jf= 0 } (ch_core.c:423) + ch-run[62763]: 155: { op= 6 k= 50000 jt= 0 jf= 0 } (ch_core.c:423) + ch-run[62763]: note: see FAQ to disassemble the above (ch_core.c:676) + ch-run[62763]: executing: true (ch_core.c:538) + +You can instead use `seccomp-tools +`_ to disassemble and pretty-print +the BPF code in a far easier format, e.g.:: + + $ sudo apt install ruby-dev + $ gem install --user-install seccomp-tools + $ export PATH=~/.gem/ruby/3.1.0/bin:$PATH + $ seccomp-tools dump -c 'ch-run --seccomp alpine:3.19 -- true' + line CODE JT JF K + ================================= + 0000: 0x20 0x00 0x00 0x00000004 A = arch + 0001: 0x15 0x00 0x11 0xc00000b7 if (A != ARCH_AARCH64) goto 0019 + 0002: 0x20 0x00 0x00 0x00000000 A = sys_number + 0003: 0x15 0x91 0x00 0x0000005b if (A == aarch64.capset) goto 0149 + [...] + 0154: 0x06 0x00 0x00 0x7fff0000 return ALLOW + 0155: 0x06 0x00 0x00 0x00050000 return ERRNO(0) + +Note that the disassembly is not perfect; e.g. if an architecture is not in +your kernel headers, the system call name is wrong. + + OCI technical notes =================== @@ -1582,49 +1662,7 @@ What to do in each location should either be obvious or commented. -Debugging :code:`seccomp(2)` BPF --------------------------------- - -:code:`ch-run --seccomp -vv` will log the BPF instructions as they are -computed, but it’s all in raw hex and hard to interpret, e.g.:: - - $ ch-run --seccomp -vv alpine:3.19 -- true - [...] - ch-run[62763]: seccomp: arch c00000b7: found 13 syscalls (ch_core.c:582) - ch-run[62763]: seccomp: arch 40000028: found 27 syscalls (ch_core.c:582) - [...] - ch-run[62763]: seccomp(2) program has 156 instructions (ch_core.c:591) - ch-run[62763]: 0: { op=20 k= 4 jt= 0 jf= 0 } (ch_core.c:423) - ch-run[62763]: 1: { op=15 k=c00000b7 jt= 0 jf= 17 } (ch_core.c:423) - ch-run[62763]: 2: { op=20 k= 0 jt= 0 jf= 0 } (ch_core.c:423) - ch-run[62763]: 3: { op=15 k= 5b jt=145 jf= 0 } (ch_core.c:423) - [...] - ch-run[62763]: 154: { op= 6 k=7fff0000 jt= 0 jf= 0 } (ch_core.c:423) - ch-run[62763]: 155: { op= 6 k= 50000 jt= 0 jf= 0 } (ch_core.c:423) - ch-run[62763]: note: see FAQ to disassemble the above (ch_core.c:676) - ch-run[62763]: executing: true (ch_core.c:538) - -You can instead use `seccomp-tools -`_ to disassemble and pretty-print -the BPF code in a far easier format, e.g.:: - - $ sudo apt install ruby-dev - $ gem install --user-install seccomp-tools - $ export PATH=~/.gem/ruby/3.1.0/bin:$PATH - $ seccomp-tools dump -c 'ch-run --seccomp alpine:3.19 -- true' - line CODE JT JF K - ================================= - 0000: 0x20 0x00 0x00 0x00000004 A = arch - 0001: 0x15 0x00 0x11 0xc00000b7 if (A != ARCH_AARCH64) goto 0019 - 0002: 0x20 0x00 0x00 0x00000000 A = sys_number - 0003: 0x15 0x91 0x00 0x0000005b if (A == aarch64.capset) goto 0149 - [...] - 0154: 0x06 0x00 0x00 0x7fff0000 return ALLOW - 0155: 0x06 0x00 0x00 0x00050000 return ERRNO(0) - -Note that the disassembly is not perfect; e.g. if an architecture is not in -your kernel headers, the system call name is wrong. .. LocalWords: milestoned gh nv cht Chacon’s scottchacon mis cantfix tmpimg .. LocalWords: rootfs cbd cae ce bafb bc weirdal yankovic nop cb fbe adb fd -.. LocalWords: abd bbf LOGFILE logfile rtd Enums +.. LocalWords: abd bbf LOGFILE logfile rtd Enums WIP rpmlintrc rhel ILLERI diff -Nru charliecloud-0.36/doc/index.rst charliecloud-0.37/doc/index.rst --- charliecloud-0.36/doc/index.rst 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/doc/index.rst 2024-02-29 20:52:27.000000000 +0000 @@ -21,6 +21,7 @@ install tutorial ch-checkns + ch-completion.bash ch-convert ch-fromhost ch-image diff -Nru charliecloud-0.36/doc/install.rst charliecloud-0.37/doc/install.rst --- charliecloud-0.36/doc/install.rst 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/doc/install.rst 2024-02-29 20:52:27.000000000 +0000 @@ -506,24 +506,13 @@ image used to exercise Charliecloud itself). -Command line completion -======================= +Command line tab completion +=========================== -Charliecloud offers experimental Bash command line completion for -:code:`ch-image`. This feature lets users have incomplete command line -arguments auto-filled by pressing Tab. We expect that in the future, this will -become more robust and available for more shells and more Charliecloud -commands. - -To enable it, once :code:`ch-completion.bash` is in your path, source it:: - - $ source ch-completion.bash - -If it doesn’t work or you just don’t like it, it can be disabled with:: - - $ ch-completion-disable - -In this case, please do submit a bug report so we can make it better. +Charliecloud offers experimental tab completion for Bash users. This feature is +currently implemented for :code:`ch-image`, :code:`ch-run`, and +:code:`ch-convert`. For details on setting up tab completion, as well as general +documentation, see :ref:`ch-completion.bash`. .. LocalWords: Werror Flameeyes plougher deps libc’s ericonr diff -Nru charliecloud-0.36/examples/Dockerfile.almalinux_8ch charliecloud-0.37/examples/Dockerfile.almalinux_8ch --- charliecloud-0.36/examples/Dockerfile.almalinux_8ch 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/examples/Dockerfile.almalinux_8ch 2024-02-29 20:52:27.000000000 +0000 @@ -16,15 +16,37 @@ # # 4. Issue #1103: Install libarchive to resolve cmake bug # -# FIXME: This instruction seems to be running afoul of #1679. I’ve re-wrapped -# it to have fewer continuations in hopes we trigger that less. +# 5. AlmaLinux lost their GPG key, so manual intervention is required to +# install current packages [1]. +# +# [1]: https://almalinux.org/blog/2023-12-20-almalinux-8-key-update/ +RUN rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux RUN dnf install -y --setopt=install_weak_deps=false \ epel-release \ - 'dnf-command(config-manager)' \ - && dnf config-manager --enable powertools \ - && dnf install -y --setopt=install_weak_deps=false \ + 'dnf-command(config-manager)' +RUN dnf config-manager --enable powertools +RUN dnf install -y --setopt=install_weak_deps=false \ dnf-plugin-ovl \ - autoconf automake gcc git libarchive libpng-devel make python3 python3-devel python3-lark-parser python3-requests python3-sphinx python3-sphinx_rtd_theme rpm-build rpmlint rsync squashfs-tools squashfuse wget which \ + autoconf \ + automake \ + gcc \ + git \ + libarchive \ + libpng-devel \ + make \ + python3 \ + python3-devel \ + python3-lark-parser \ + python3-requests \ + python3-sphinx \ + python3-sphinx_rtd_theme \ + rpm-build \ + rpmlint \ + rsync \ + squashfs-tools \ + squashfuse \ + wget \ + which \ && dnf clean all # Need wheel to install bundled Lark, and the RPM version doesn’t work. diff -Nru charliecloud-0.36/examples/multistage/Dockerfile charliecloud-0.37/examples/multistage/Dockerfile --- charliecloud-0.36/examples/multistage/Dockerfile 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/examples/multistage/Dockerfile 2024-02-29 20:52:27.000000000 +0000 @@ -16,9 +16,18 @@ WORKDIR /usr/local/src # GNU Hello. Install using DESTDIR to make copying below easier. -RUN wget -nv https://ftpmirror.gnu.org/gnu/hello/hello-2.10.tar.gz -RUN tar xf hello-2.10.tar.gz \ - && cd hello-2.10 \ +# +# This downloads from a specific mirror [1] that smelled reliable because both +# ftp.gnu.org itself and the mirror alias ftpmirror.gnu.org are unreliable. +# Specifically, ftpmirror.gnu.org frequently ends up a tripadvisor.com, which +# is frequently HTTP 500. +# +# [1]: https://www.gnu.org/prep/ftp.html +ARG gnu_mirror=mirrors.kernel.org/gnu +ARG version=2.12.1 +RUN wget -nv https://${gnu_mirror}/hello/hello-${version}.tar.gz +RUN tar xf hello-${version}.tar.gz \ + && cd hello-${version} \ && ./configure \ && make -j $(getconf _NPROCESSORS_ONLN) \ && make install DESTDIR=/hello diff -Nru charliecloud-0.36/examples/paraview/Dockerfile charliecloud-0.37/examples/paraview/Dockerfile --- charliecloud-0.36/examples/paraview/Dockerfile 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/examples/paraview/Dockerfile 2024-02-29 20:52:27.000000000 +0000 @@ -2,6 +2,8 @@ FROM openmpi WORKDIR /usr/local/src +# The mesa rpms introduce explicit dependencies python3.11-libs; ParaView will +# error at configure time unless we provide the python3.11-devel package. RUN dnf install -y --setopt=install_weak_deps=false \ cmake \ expat-devel \ @@ -11,10 +13,9 @@ mesa-libGL-devel \ mesa-libOSMesa \ mesa-libOSMesa-devel \ - python3 \ - python3-devel \ python3-mako \ python3-pip \ + python3.11-devel \ zlib-devel \ && dnf clean all @@ -26,8 +27,8 @@ # ParaView. Use system libpng to work around issues linking with NEON specific # symbols on ARM. -ARG PARAVIEW_MAJORMINOR=5.9 -ARG PARAVIEW_VERSION=5.9.1 +ARG PARAVIEW_MAJORMINOR=5.11 +ARG PARAVIEW_VERSION=5.11.2 RUN wget -nv -O ParaView-v${PARAVIEW_VERSION}.tar.xz "https://www.paraview.org/paraview-downloads/download.php?submit=Download&version=v${PARAVIEW_MAJORMINOR}&type=binary&os=Sources&downloadFile=ParaView-v${PARAVIEW_VERSION}.tar.xz" \ && tar xf ParaView-v${PARAVIEW_VERSION}.tar.xz \ && mkdir ParaView-v${PARAVIEW_VERSION}.build \ diff -Nru charliecloud-0.36/examples/seccomp/Dockerfile charliecloud-0.37/examples/seccomp/Dockerfile --- charliecloud-0.36/examples/seccomp/Dockerfile 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/examples/seccomp/Dockerfile 2024-02-29 20:52:27.000000000 +0000 @@ -1,3 +1,4 @@ +# ch-test-scope: standard # ch-test-builder-include: ch-image FROM alpine:3.17 RUN apk add gcc musl-dev strace diff -Nru charliecloud-0.36/examples/spack/Dockerfile charliecloud-0.37/examples/spack/Dockerfile --- charliecloud-0.36/examples/spack/Dockerfile 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/examples/spack/Dockerfile 2024-02-29 20:52:27.000000000 +0000 @@ -1,3 +1,4 @@ +# ch-test-scope: full FROM almalinux:8 # Note: Spack is a bit of an odd duck testing wise. Because it’s a package diff -Nru charliecloud-0.36/lib/base.sh charliecloud-0.37/lib/base.sh --- charliecloud-0.36/lib/base.sh 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/lib/base.sh 2024-02-29 20:52:27.000000000 +0000 @@ -104,15 +104,6 @@ return 1 # not a basic arg } -parse_basic_args () { - if [ "$#" -eq 0 ]; then - usage 1 - fi - for i in "$@"; do - parse_basic_arg "$i" || true - done -} - # Redirect standard streams (or not) depending on “quiet” level. See table in # FAQ. quiet () { diff -Nru charliecloud-0.36/lib/build.py charliecloud-0.37/lib/build.py --- charliecloud-0.36/lib/build.py 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/lib/build.py 2024-02-29 20:52:27.000000000 +0000 @@ -210,7 +210,7 @@ ch.VERBOSE(x) # noise about what was expected in the grammar ch.FATAL("can’t parse: %s:%d,%d\n\n%s" % (cli.file, x.line, x.column, x.get_context(text, 39))) - ch.VERBOSE(tree.pretty()) + ch.VERBOSE(tree.pretty()[:-1]) # rm trailing newline # Sometimes we exit after parsing. if (cli.parse_only): diff -Nru charliecloud-0.36/lib/build_cache.py charliecloud-0.37/lib/build_cache.py --- charliecloud-0.36/lib/build_cache.py 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/lib/build_cache.py 2024-02-29 20:52:27.000000000 +0000 @@ -907,7 +907,7 @@ i += 1 text = "\n".join(lines) text = re.sub(r"^(D|M [0-7]+ [0-9a-f]+) \.(git|weirdal_)ignore$", - "#\g<0>", text, flags=re.MULTILINE) + r"#\g<0>", text, flags=re.MULTILINE) #fs.Path("/tmp/new").file_write(text) self.git(["fast-import", "--force"], input=text) self.git(["reflog", "expire", "--all", "--expire=now"]) @@ -1324,6 +1324,15 @@ // "*" // im.GIT_DIR)) } wt_gits = { fs.Path(i).name for i in glob.iglob("%s/worktrees/*" % self.root) } + # Unlink images that think they are in Git but are not. This should not + # happen, but it does, and I wasn’t able to figure out how it happened. + wt_gits_orphaned = wt_actuals - wt_gits + for img_dir in wt_gits_orphaned: + link = ch.storage.unpack_base // img_dir // im.GIT_DIR + ch.WARNING("image erroneously marked cached, fixing: %s" % link, + ch.BUG_REPORT_PLZ) + link.unlink() + wt_actuals -= wt_gits_orphaned # Delete worktree data for images that no longer exist or aren’t # Git-enabled any more. wt_gits_deleted = wt_gits - wt_actuals @@ -1331,7 +1340,12 @@ (ch.storage.build_cache // "worktrees" // wt).rmtree() ch.VERBOSE("deleted %d stale worktree metadatas" % len(wt_gits_deleted)) wt_gits -= wt_gits_deleted - assert (wt_gits == wt_actuals) + # Validate that the pointers are in sync now. + if (wt_gits != wt_actuals): + ch.ERROR("found images -> cache links: %s" % " ".join(wt_actuals)) + ch.ERROR("found cache -> images links: %s" % " ".join(wt_gits)) + ch.FATAL("build cache is desynchronized, cannot proceed", + ch.BUG_REPORT_PLZ) # If storage directory moved, repair all the paths. if (len(wt_gits) > 0): wt_dir_stored = fs.Path(( ch.storage.build_cache diff -Nru charliecloud-0.36/lib/charliecloud.py charliecloud-0.37/lib/charliecloud.py --- charliecloud-0.36/lib/charliecloud.py 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/lib/charliecloud.py 2024-02-29 20:52:27.000000000 +0000 @@ -20,6 +20,7 @@ import sys import time import traceback +import warnings # List of dependency problems. This variable needs to be created before we @@ -149,7 +150,7 @@ trace_fatal = False # Add abbreviated traceback to fatal error hint. # Warnings to be re-printed when program exits -warnings = list() +warns = list() # True if the download cache is enabled. dlcache_p = None @@ -158,6 +159,9 @@ profiling = False profile = None +# Width of terminal. +term_width = shutil.get_terminal_size(fallback=(sys.maxsize, -1))[0] + ## Exceptions ## @@ -478,6 +482,10 @@ tr = None raise Fatal_Error(msg, hint, tr, **kwargs) +def ILLERI(msg, hint=None, **kwargs): + # For temporary debugging only. See contributors’ guide. + log(msg, hint, None, "38;5;207m", "", **kwargs) # hot pink + def INFO(msg, hint=None, **kwargs): "Note: Use print() for output; this function is for logging." if (log_level >= Log_Level.INFO): @@ -494,7 +502,7 @@ def WARNING(msg, hint=None, msg_save=True, **kwargs): if (log_level > Log_Level.STDERR): if (msg_save): - warnings.append(msg) + warns.append(msg) log(msg, hint, None, "31m", "warning: ", **kwargs) # red def arch_host_get(): @@ -714,6 +722,12 @@ else: rg.auth_p = False VERBOSE("registry authentication: %s" % rg.auth_p) + # Red Hat Python warns about tar bugs, citing CVE-2007-4559. + # We mitigate this already, so suppress the noise. (#1818) + warnings.filterwarnings("ignore", module=r"^tarfile$", + message=( "^The default behavior of tarfile" + + " extraction has been changed to disallow" + + " common exploits")) # misc global password_many, profiling password_many = cli.password_many @@ -930,7 +944,7 @@ [fs.Path(filename) for filename in filenames]) def warnings_dump(): - if (len(warnings) > 0): - WARNING("reprinting %d warning(s)" % len(warnings), msg_save=False) - for msg in warnings: + if (len(warns) > 0): + WARNING("reprinting %d warning(s)" % len(warns), msg_save=False) + for msg in warns: WARNING(msg, msg_save=False) diff -Nru charliecloud-0.36/lib/force.py charliecloud-0.37/lib/force.py --- charliecloud-0.36/lib/force.py 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/lib/force.py 2024-02-29 20:52:27.000000000 +0000 @@ -183,8 +183,8 @@ { "name": "RHEL 7 and derivatives", "match": ("/etc/redhat-release", r"release 7\."), "init": [ ("command -v fakeroot > /dev/null", - "set -ex; " - "if ! grep -Eq '\[epel\]' /etc/yum.conf /etc/yum.repos.d/*; then " + "set -e; " + r"if ! grep -Eq '\[epel\]' /etc/yum.conf /etc/yum.repos.d/*; then " "yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm; " "yum install -y fakeroot; " "yum remove -y epel-release; " @@ -198,8 +198,8 @@ { "name": "RHEL 8+ and derivatives", "match": ("/etc/redhat-release", r"release (?![0-7]\.)"), "init": [ ("command -v fakeroot > /dev/null", - "set -ex; " - "if ! grep -Eq '\[epel\]' /etc/yum.conf /etc/yum.repos.d/*; then " + "set -e; " + r"if ! grep -Eq '\[epel\]' /etc/yum.conf /etc/yum.repos.d/*; then " # Macro %rhel from *-release* RPM, e.g. redhat-release-server # or centos-linux-release; thus reliable. "dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-$(rpm -E %rhel).noarch.rpm; " diff -Nru charliecloud-0.36/lib/image.py charliecloud-0.37/lib/image.py --- charliecloud-0.36/lib/image.py 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/lib/image.py 2024-02-29 20:52:27.000000000 +0000 @@ -75,7 +75,7 @@ _WSH: /[ \t]/+ // sequence of horizontal whitespace _LINE_CONTINUE: "\\" _WSH? "\n" // line continuation _WS: ( _WSH | _LINE_CONTINUE )+ // horizontal whitespace w/ line continuations -_NEWLINES: ( _WSH? "\n" )+ // sequence of newlines +_NEWLINES: ( _WS? "\n" )+ // sequence of newlines %import common.ESCAPED_STRING -> STRING_QUOTED """ @@ -120,7 +120,7 @@ env_equalses: env_equals ( _WS env_equals )* env_equals: WORD "=" ( WORD | STRING_QUOTED ) -from_: "FROM"i ( _WS ( option | option_keypair ) )* _WS image_ref [ _WS from_alias ] _NEWLINES +from_: "FROM"i ( _WS ( option | option_keypair ) )* _WS image_ref ( _WS from_alias )? _NEWLINES from_alias: "AS"i _WS IR_PATH_COMPONENT // FIXME: undocumented; this is guess label: "LABEL"i _WS ( label_space | label_equalses ) _NEWLINES @@ -181,6 +181,9 @@ # Top-level directories we create if not present. STANDARD_DIRS = { "bin", "dev", "etc", "mnt", "proc", "sys", "tmp", "usr" } +# Width of token name when truncating text to fit on screen. +WIDTH_TOKEN_MAX = 10 + ## Classes ## @@ -558,6 +561,7 @@ # Correct absolute paths. if (m.name.is_absolute()): m.name = m.name.relative_to("/") + abs_ct += 1 # Record top-level directory. if (len(m.name.parts) > 1 or m.isdir()): top_dirs.add(m.name.first) @@ -863,6 +867,29 @@ class Tree(lark.tree.Tree): + def _pretty(self, level, istr): + # Re-implement with less space optimization and more debugging info. + # See: https://github.com/lark-parser/lark/blob/262ab71/lark/tree.py#L78 + pfx = "%4d %3d%s" % (self.meta.line, self.meta.column, istr*(level+1)) + yield (pfx + self._pretty_label() + "\n") + for c in self.children: + if (isinstance(c, Tree)): + yield from c._pretty(level + 1, istr) + else: + text = c + type_ = c.type + width = len(pfx) + len(istr) + len(text) + len(type_) + 2 + over = width - ch.term_width + if (len(type_) > WIDTH_TOKEN_MAX): + # trim token (unconditionally for consistent alignment) + token_rm = len(type_) - WIDTH_TOKEN_MAX + type_ = type_[:-token_rm] + over -= token_rm + if (over > 0): + # trim text (if needed) + text = text[:-(over + 3)] + "..." + yield "%s%s %s %s\n" % (pfx, istr, type_, text) + def child(self, cname): """Locate a descendant subtree named cname using breadth-first search and return it. If no such subtree exists, return None.""" diff -Nru charliecloud-0.36/lib/registry.py charliecloud-0.37/lib/registry.py --- charliecloud-0.36/lib/registry.py 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/lib/registry.py 2024-02-29 20:52:27.000000000 +0000 @@ -1,4 +1,5 @@ import getpass +import hashlib import io import os import re @@ -395,7 +396,7 @@ # /v2/library/hello-world/blobs/ url = self._url_of("blobs", "sha256:" + digest) sw = ch.Progress_Writer(path, msg) - self.request("GET", url, out=sw) + self.request("GET", url, out=sw, hd=digest) sw.close() def blob_upload(self, digest, data, note=""): @@ -518,17 +519,19 @@ self.request("PUT", url, {201}, data=manifest, headers={ "Content-Type": TYPES_MANIFEST["docker2"] }) - def request(self, method, url, statuses={200}, out=None, **kwargs): + def request(self, method, url, statuses={200}, out=None, hd=None, **kwargs): """Request url using method and return the response object. If statuses is given, it is set of acceptable response status codes, defaulting to {200}; any other response is a fatal error. If out is given, response content will be streamed to this Progress_Writer object and - must be non-zero length. + must be non-zero length. If hd is given, validate integrity of + downloaded data using expected hash digest. Use current session if there is one, or start a new one if not. If authentication fails (or isn’t initialized), then authenticate harder and re-try the request.""" # Set up. + assert (out or hd is None), "digest only checked if streaming" self.session_init_maybe() ch.VERBOSE("auth: %s" % self.auth) if (out is not None): @@ -547,6 +550,7 @@ else: ch.FATAL("unhandled authentication failure") # Stream response if needed. + m = hashlib.sha256() if (out is not None and res.status_code == 200): try: length = int(res.headers["Content-Length"]) @@ -557,6 +561,10 @@ out.start(length) for chunk in res.iter_content(ch.HTTP_CHUNK_SIZE): out.write(chunk) + m.update(chunk) # store downloaded hash digest + # Validate integrity of downloaded data + if (hd is not None and hd != m.hexdigest()): + ch.FATAL("registry streamed response content is invalid") # Done. return res diff -Nru charliecloud-0.36/packaging/fedora/charliecloud.spec charliecloud-0.37/packaging/fedora/charliecloud.spec --- charliecloud-0.36/packaging/fedora/charliecloud.spec 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/packaging/fedora/charliecloud.spec 2024-02-29 20:52:27.000000000 +0000 @@ -136,6 +136,7 @@ %{_mandir}/man1/ch-run.1* %{_mandir}/man1/ch-run-oci.1* %{_mandir}/man7/charliecloud.7* +%{_mandir}/man7/ch-completion.bash.7* %{_prefix}/lib/%{name}/base.sh %{_prefix}/lib/%{name}/contributors.bash %{_prefix}/lib/%{name}/version.sh diff -Nru charliecloud-0.36/test/build/55_cache.bats charliecloud-0.37/test/build/55_cache.bats --- charliecloud-0.36/test/build/55_cache.bats 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/test/build/55_cache.bats 2024-02-29 20:52:27.000000000 +0000 @@ -26,9 +26,13 @@ } -### Test cases that go in the paper ### +### Test cases for build cache paper figures (DOI: 10.1145/3624062.3624585) ### -@test "${tag}: §3.1 empty cache" { +# Not all of these ended up as figures in the published paper, but I’m leaving +# them here because they were targeted to the paper and were used in some +# versions. If they are in the published paper, the figure number is noted. + +@test "${tag}: Fig. 2: empty cache" { rm -Rf --one-file-system "$CH_IMAGE_STORAGE" blessed_tree=$(cat << EOF @@ -44,7 +48,7 @@ } -@test "${tag}: §3.2.1 initial pull" { +@test "${tag}: Fig. 3: initial pull" { ch-image pull alpine:3.17 blessed_tree=$(cat << 'EOF' @@ -59,7 +63,7 @@ } -@test "${tag}: §3.5 FROM" { +@test "${tag}: FROM" { # FROM pulls ch-image build-cache --reset run ch-image build -v -t d -f bucache/from.df . @@ -108,7 +112,7 @@ } -@test "${tag}: §3.3.1 Dockerfile A" { +@test "${tag}: Fig. 4: a.df" { ch-image build-cache --reset ch-image build -t a -f bucache/a.df . @@ -127,7 +131,7 @@ } -@test "${tag}: §3.3.2 Dockerfile B" { +@test "${tag}: Fig. 5: b.df" { ch-image build-cache --reset ch-image build -t a -f bucache/a.df . @@ -148,7 +152,7 @@ } -@test "${tag}: §3.3.3 Dockerfile C" { +@test "${tag}: Fig. 6: c.df" { ch-image build-cache --reset ch-image build -t a -f bucache/a.df . @@ -173,7 +177,7 @@ } -@test "${tag}: rebuild A" { +@test "${tag}: rebuild a.df" { # Forcing a rebuild show produce a new pair of FOO and BAR commits from # from the alpine branch. blessed_out=$(cat << 'EOF' @@ -197,7 +201,7 @@ } -@test "${tag}: rebuild B" { +@test "${tag}: rebuild b.df" { # Rebuild of B. Since A was rebuilt in the last test, and because # the rebuild behavior only forces misses on non-FROM instructions, it # should now be based on A's new commits. @@ -222,7 +226,7 @@ } -@test "${tag}: rebuild C" { +@test "${tag}: c.df" { # Rebuild C. Since C doesn’t reference img_a (like img_b does) rebuilding # causes a miss on FOO. Thus C makes new FOO and QUX commits. # @@ -255,7 +259,7 @@ } -@test "${tag}: §3.7 change then revert" { +@test "${tag}: Fig. 7: change then revert" { ch-image build-cache --reset ch-image build -t e -f bucache/a.df . @@ -296,7 +300,7 @@ } -@test "${tag}: §3.4.1 two pulls, same" { +@test "${tag}: two pulls, same" { ch-image build-cache --reset ch-image pull alpine:3.17 ch-image pull alpine:3.17 @@ -313,7 +317,7 @@ } -@test "${tag}: §3.4.2 two pulls, different" { +@test "${tag}: two pulls, different" { localregistry_init unset CH_IMAGE_AUTH # don’t give local creds to Docker Hub @@ -561,7 +565,7 @@ } -@test "${tag}: §3.6 rebuild" { +@test "${tag}: Fig. 8: rebuild" { ch-image build-cache --reset # Build. Mode should not matter here, but we use enabled because that’s @@ -1100,6 +1104,7 @@ diff -u <(echo "$blessed_out") <(echo "$output" | treeonly) } + @test "${tag}: multistage COPY" { # Multi-stage build with no instructions in the first stage. df_no=$(cat <<'EOF' @@ -1502,6 +1507,7 @@ [[ ! -e $CH_IMAGE_STORAGE/img/tmpimg/var/lib/rpm/__db.001 ]] } + @test "${tag}: restore ACLs, xattrs" { # issue #1287 # Check if test needs to be skipped touch "$BATS_TMPDIR/tmpfs_test" @@ -1538,3 +1544,39 @@ [[ $status -eq 0 ]] [[ $output = *"user:$USER:r--"* ]] } + + +@test "${tag}: orphaned worktrees" { # PR #1824 + img_metadata=$CH_IMAGE_STORAGE/img/tmpimg/ch + img_to_git=$img_metadata/git + git_worktrees=$CH_IMAGE_STORAGE/bucache/worktrees + git_to_img=$git_worktrees/tmpimg + + # pull image, should be unlinked + ch-image pull --no-cache scratch tmpimg + ch-image build-cache # rm leftover $git_to_img if it exists + ls -lh "$img_metadata" "$git_worktrees" + [[ ! -e "$img_to_git" ]] + [[ ! -e "$git_to_img" ]] + + # add fake link + touch "$img_to_git" + ls -lh "$img_metadata" "$git_worktrees" + [[ -e "$img_to_git" ]] + [[ ! -e "$git_to_img" ]] + + # ch-image should warn and fix instead of crashing + run ch-image list + echo "$output" + [[ $status -eq 0 ]] + [[ $output = *'image erroneously marked cached, fixing'* ]] + + # warning should now be gone and the state be good + ls -lh "$img_metadata" "$git_worktrees" + [[ ! -e "$img_to_git" ]] + [[ ! -e "$git_to_img" ]] + run ch-image list + echo "$output" + [[ $status -eq 0 ]] + [[ $output != *'image erroneously marked cached, fixing'* ]] +} diff -Nru charliecloud-0.36/test/docs-sane.py.in charliecloud-0.37/test/docs-sane.py.in --- charliecloud-0.36/test/docs-sane.py.in 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/test/docs-sane.py.in 2024-02-29 20:52:27.000000000 +0000 @@ -8,16 +8,22 @@ # # a. man/charliecloud.7 exists. # -# b. Every executable FOO in bin has: +# b. The correct files FOO in bin have: # # - doc/FOO.rst -# - doc/man/FOO.1 +# - doc/man/FOO.N # - an entry under “See also” in charliecloud.7 # -# c. There aren't the things in (b) except for the executables (modulo a -# few execeptions for the other documentation source files). +# Where “N” is the appropriate man section number (e.g. 1 for +# executables). Currently, the “correct” files in bin are: # -# d. Summary in “FOO --help” matches FOO.rst and conf.py. +# - All executables +# - ch-completion.bash +# +# c. There aren’t any unexpcected .rst files, man files, or charliecloud.7 +# “See also” entries. +# +# d. Synopsis in “FOO --help” (if applicable) matches FOO.rst and conf.py. from __future__ import print_function @@ -27,6 +33,13 @@ import subprocess import sys +# Dict of documentation files. Executables are added in “main()”. Files that are +# not executables should be manually added here. +man_targets = {"charliecloud": {"synopsis": "", + "sec": 7}, + "ch-completion.bash": {"synopsis": "Tab completion for the Charliecloud command line.", + "sec": 7}} + CH_BASE = os.path.abspath(os.path.dirname(__file__) + "/..") if (not os.path.isfile("%s/bin/ch-run" % CH_BASE)): @@ -44,43 +57,63 @@ else: sys.exit(1) +# This is the function that actually performs the sanity check for the docs (see +# the comment at the top of this file). def check_man(): + global man_targets + # Add entries for executables to “man_targets”. “sec” is set to 1, “synopsis” + # is set using the executable’s “--help” option (see “help_get”). Note that + # this code assumes that a file is an executable if the execute bit for any + # permission group. os.chdir(CH_BASE + "/bin") + for f in os.listdir("."): + if (os.path.isfile(f) and os.stat(f).st_mode & 0o111): + man_targets[f] = {"synopsis": help_get(f), "sec": 1} - execs = { f for f in os.listdir(".") - if (os.path.isfile(f) and os.stat(f).st_mode & 0o111) } - helps = { x: help_get(x) for x in execs } - + # Check that all the expected .rst files are in doc/ and that no unexpected + # .rst files are present. os.chdir(CH_BASE + "/doc") - - man_rsts = set(glob.glob("ch-*.rst")) - man_rsts_expected = { i + ".rst" for i in execs } + man_rsts = set(glob.glob("ch*.rst")) + man_rsts_expected = { i + ".rst" for i in man_targets } lose_lots("unexpected .rst", man_rsts - man_rsts_expected) lose_lots("missing .rst", man_rsts_expected - man_rsts) + # Construct a dictionary of synopses from the .rst files in doc. We’ll + # compare these against the synopses in “man_targets”, which have either been + # entered manually (for non-executables), or obtained from the help message + # (for executables). man_synopses = dict() - for man in execs: + for man in man_targets: m = re.search(r"^\s+(.+)$\n\n\n^Synopsis", open(man + ".rst").read(), re.MULTILINE) if (m is not None): man_synopses[man] = m[1] + elif (man_targets[man]["synopsis"] == ""): + # No synopsis expected. + man_synopses[man] = "" + + # Check for missing or unexpected synopses. + lose_lots("missing synopsis", set(man_targets) - set(man_synopses)) + lose_lots("unexpected synopsis", set(man_synopses) - set(man_targets)) - lose_lots("missing synopsis", execs - set(man_synopses)) - + # Check for synopses that don’t match the expectation provided in + # “man_targets”. lose_lots("bad synopsis in man page", - { "%s: %s (expected: %s)" % (p, s, helps[p]) + { "%s: %s (expected: %s)" % (p, man_targets[p]["synopsis"]) for (p, s) in man_synopses.items() - if ( p in helps - and summary_unrest(s) != helps[p]) + if ( p in man_targets + and summary_unrest(s) != man_targets[p]["synopsis"]) and "deprecated" not in s.lower() }) - sees = { m[0] for m in re.finditer(r"ch-[a-z0-9-]+\(1\)", + # Check for “see also” entries in charliecloud.rst. + sees = { m[0] for m in re.finditer(r"ch-[a-z0-9-.]+\([1-8]\)", open("charliecloud.rst").read()) } - sees_expected = { i + "(1)" for i in execs } + sees_expected = { i + "(%d)" % (man_targets[i]["sec"]) for i in man_targets } - {"charliecloud(7)"} lose_lots("unexpected see-also in charliecloud.rst", sees - sees_expected) lose_lots("missing see-also in charliecloud.rst", sees_expected - sees) + # Check for consistency with “conf.py” conf = {} execfile("./conf.py", conf) for (docname, name, desc, authors, section) in conf["man_pages"]: @@ -89,20 +122,18 @@ if (len(authors) != 0): lose("conf.py: bad authors: %s: %s" % (name, authors)) if (name != "charliecloud"): - if (section != 1): - lose("conf.py: bad section: %s: %s != 1" % (name, section)) - if (name not in helps): + if (section != man_targets[name]["sec"]): + lose("conf.py: bad section: %s: %s != %d" % (name, section, man_targets[name]["sec"])) + if (name not in man_targets): lose("conf.py: unexpected man page: %s" % name) - elif (desc + "." != helps[name] and "deprecated" not in desc.lower()): + elif (desc + "." != man_targets[name]["synopsis"] and "deprecated" not in desc.lower()): lose("conf.py: bad summary: %s: %s" % (name, desc)) - else: - if (section != 7): - lose("conf.py: bad section: %s: %s != 7" % (name, section)) + # Check that all expected man pages are present, and *only* the expected man + # pages. os.chdir(CH_BASE + "/doc/man") - - mans = set(glob.glob("*.1")) - mans_expected = { i + ".1" for i in execs } + mans = set(glob.glob("*.[1-8]")) + mans_expected = { i + ".%d" % (man_targets[i]["sec"]) for i in man_targets} lose_lots("unexpected man", mans - mans_expected) lose_lots("missing man", mans_expected - mans) @@ -116,7 +147,10 @@ code = compile(fp.read(), path, "exec") exec(code, globals_) +# Get an executable’s synopsis from its help message. def help_get(prog): + if (not os.path.isfile(prog)): + lose("not a file: %s" % prog) try: out = subprocess.check_output(["./" + prog, "--help"], universal_newlines=True, diff -Nru charliecloud-0.36/test/old-storage charliecloud-0.37/test/old-storage --- charliecloud-0.36/test/old-storage 2024-01-19 20:44:04.000000000 +0000 +++ charliecloud-0.37/test/old-storage 2024-02-29 20:52:27.000000000 +0000 @@ -77,6 +77,10 @@ oldtars=$(printf '%s ' "$@") # https://www.shellcheck.net/wiki/SC2124 fi +summary='' +pass_ct=0 +fail_ct=0 + ### Main loop @@ -113,9 +117,31 @@ ch-image pull archlinux:latest INFO "testing" - ch-test -b ch-image --pedantic=no -s "$scope" all + if (ch-test -b ch-image --pedantic=no -s "$scope" all); then + pass_ct=$((pass_ct + 1)) + summary+="😁 ${oldtar}: PASS"$'\n' + else + fail_ct=$((fail_ct + 1)) + summary+="🤦 ${oldtar}: FAIL"$'\n' + fi + INFO "deleting: $storage" rm -Rf --one-file-system "$storage" [[ ! -d $storage ]] done + +cat <