Skip to content

Implement more TODOs in oci-validate code #125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions helpers/oci-import.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
else . end

| .mediaType //= media_type_oci_index # TODO index normalize function? just force this to be set/valid instead?
| validate_oci_index
| validate_oci_index({ indexPlatformsOptional: true })
| validate_length(.manifests; 1) # TODO allow upstream attestation in the future?

# purge maintainer-provided URLs / annotations (https://github.com/docker-library/bashbrew/blob/4e0ea8d8aba49d54daf22bd8415fabba65dc83ee/cmd/bashbrew/oci-builder.go#L146-L147)
Expand All @@ -123,7 +123,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
$build
| .source.arches[.build.arch].platform
)
# TODO .manifests[1].platform ?

# inject our build annotations
| .manifests[0].annotations += (
Expand All @@ -136,8 +135,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
' "$file" | tee index.json.new
mv -f index.json.new index.json

# TODO "crane validate" is definitely interesting here -- it essentially validates all the descriptors recursively, including diff_ids, but it only supports "remote" or "tarball" (which refers to the *old* "docker save" tarball format), so isn't useful here, but we need to do basically that exact work

# now that "index.json" represents the exact index we want to push, let's push it down into a blob and make a new appropriate "index.json" for "crane push"
# TODO we probably want/need some "traverse/manipulate an OCI layout" helpers 😭
mediaType="$(jq --raw-output '.mediaType' index.json)"
Expand All @@ -159,3 +156,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --null-input --tab '
}
| normalize_manifest
' > index.json

# TODO move this further out
"$BASHBREW_META_SCRIPTS/helpers/oci-validate.sh" .
7 changes: 5 additions & 2 deletions helpers/oci-sbom.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ cd "$output"
imageIndex="$(jq -L"$BASHBREW_META_SCRIPTS" --raw-output '
include "oci";
include "validate";
validate_oci_index
validate_oci_index({ indexPlatformsOptional: true })
| validate_length(.manifests; 1)
| validate_IN(.manifests[0].mediaType; media_types_index)
| .manifests[0].digest
Expand Down Expand Up @@ -91,7 +91,7 @@ done
sbomIndex="$(jq -L"$BASHBREW_META_SCRIPTS" --raw-output '
include "oci";
include "validate";
validate_oci_index
validate_oci_index({ indexPlatformsOptional: true })
| validate_length(.manifests; 1)
| validate_IN(.manifests[0].mediaType; media_types_index)
| .manifests[0].digest
Expand Down Expand Up @@ -146,3 +146,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --null-input --tab '
}
| normalize_manifest
' > index.json

# TODO move this further out
"$BASHBREW_META_SCRIPTS/helpers/oci-validate.sh" .
159 changes: 104 additions & 55 deletions helpers/oci-validate.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -Eeuo pipefail

# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, some media types, layer diff_ids, etc)
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, media types, layer diff_ids, etc)

layout="$1"; shift

Expand All @@ -23,85 +23,134 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp '
| empty
' oci-layout

# TODO this is all rubbish; it needs more thought (the jq functions it invokes are pretty solid now though)
# TODO (recursively?) validate subject descriptors in here somewhere 🤔

# TODO handle objects that *only* exist in the "data" field too 🤔 https://github.com/docker-library/meta-scripts/pull/125#discussion_r2070633122
# maybe descriptor takes a "--data" flag that then returns the input descriptor, but enhanced with a "data" field so the other functions can use that to extract the data instead of relying on files?

descriptor() {
local file="$1"; shift # "blobs/sha256/xxx"
echo "blob: $file"
local digest="$1"; shift # "sha256:xxx"
local size="$1"; shift # "123"
local algo="${digest%%:*}" # sha256
local hash="${digest#$algo:}" # xxx
local diskSize
[ "$algo" = 'sha256' ] # TODO error message
diskSize="$(stat --dereference --format '%s' "$file")"
[ "$size" = "$diskSize" ] # TODO error message
"${algo}sum" <<<"$hash *$file" --check --quiet --strict -
local desc; desc="$(cat)"
local shell
shell="$(jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
include "validate";
include "oci";
validate_one
| validate_oci_descriptor
| (
@sh "local algo=\(
.digest
| split(":")[0]
| validate_IN(.; "sha256", "sha512") # TODO more algorithms? need more tools on the host
)",

@sh "local data=\(
if has("data") then
.data
else " " end # empty string is valid base64 (which we should validate), but spaces are not, so we can use a single space to detect "data not set"
)",

empty
)
')"
eval "$shell"
local digest size dataDigest= dataSize=
digest="$("${algo}sum" "$file" | cut -d' ' -f1)"
digest="$algo:$digest"
size="$(stat --dereference --format '%s' "$file")"
if [ "$data" != ' ' ]; then
dataDigest="$(base64 <<<"$data" -d | "${algo}sum" | cut -d' ' -f1)"
dataDigest="$algo:$dataDigest"
dataSize="$(base64 <<<"$data" -d | wc --bytes)"
# TODO *technically* we could get clever here and pass `base64 -d` to something like `tee >(wc --bytes) >(dig="$(sha256sum | cut -d' ' -f1)" && echo "sha256:$dig" && false) > /dev/null` to avoid parsing the base64 twice, but then failure cases are less likely to be caught, so it's safer to simply redecode (and we can't decode into a variable because this might be binary data *and* bash will do newline munging in both directions)
fi
Comment on lines +57 to +66
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be able to handle a descriptor that only has data and not also a file on disk? Or is that not a valid OCI layout?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is such an (unintentionally) brutal comment 😂 😭

You're absolutely right, and making me question whether I like this interface again.

Implementations worried about portability should absolutely have both data and the blob in the blobs/ folder, but it's not technically invalid not to, and I'd love to support that. 🤔

jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --arg digest "$digest" --arg size "$size" --arg dataDigest "$dataDigest" --arg dataSize "$dataSize" '
include "validate";
validate_one
| validate_IN(.digest; $digest)
| validate_IN(.size; $size | tonumber)
| if has("data") then
validate(.data;
$digest == $dataDigest
and $size == $dataSize
; "(decoded) data has size \($dataSize) and digest \($dataDigest) (expected \($size) and \($digest))")
else . end
| empty
'
}

images() {
echo "image: $*"
# TODO validate config (diff_ids, history, platform - gotta carry *two* levels of descriptors for that, and decompress all the layers 🙊)
# TODO validate provenance/SBOM layer contents?

image() {
local file="$1"; shift
echo "image: $file"
local desc; desc="$(cat)"
descriptor <<<"$desc" "$file"
local shell
shell="$(
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
include "validate";
include "oci";
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
validate_length(.; $expected | tonumber)
| map(validate_oci_image)
validate_length(.; 2)
| .[0] as $desc
| .[1]
| validate_oci_image({
imageAttestation: IN($desc.annotations["vnd.docker.reference.type"]; "attestation-manifest"),
})
| if $desc then
validate_IN(.mediaType; $desc.mediaType)
| validate_IN(.artifactType; $desc.artifactType)
else . end
| (
(
.[].config, .[].layers[]
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
# TODO data?
.config, .layers[]
| @sh "descriptor <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
),

empty # trailing comma
)
' "$@"
' /dev/stdin "$file"
)"
eval "$shell"
}

# TODO pass descriptor values down so we can validate that they match (.mediaType, .artifactType, .platform across *two* levels index->manifest->config), similar to .data
# TODO disallow urls completely?

indexes() {
echo "index: $*"
index() {
local file="$1"; shift
echo "index: $file"
local desc; desc="$(cat)"
if [ "$desc" != 'null' ]; then
descriptor <<<"$desc" "$file"
fi
local shell
shell="$(
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
include "validate";
include "oci";
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
validate_length(.; $expected | tonumber)
| map(validate_oci_index)
validate_length(.; 2)
| .[0] as $desc
| .[1]
| validate_oci_index({
indexPlatformsOptional: (input_filename == "index.json"),
})
| if $desc then
validate_IN(.mediaType; $desc.mediaType)
| validate_IN(.artifactType; $desc.artifactType)
else . end
| .manifests[]
| (
(
.[].manifests[]
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
# TODO data?
),

(
[ .[].manifests[] | select(IN(.mediaType; media_types_image)) | .digest ]
| if length > 0 then
"images \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
else empty end
),

(
[ .[].manifests[] | select(IN(.mediaType; media_types_index)) | .digest ]
| if length > 0 then
"indexes \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
else empty end
),

empty # trailing comma
)
' "$@"
.mediaType
| if IN(media_types_index) then
"index"
elif IN(media_types_image) then
"image"
else
error("UNSUPPORTED MEDIA TYPE: \(.)")
end
) + @sh " <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
' /dev/stdin "$file"
)"
eval "$shell"
}

indexes index.json
index <<<'null' index.json
Loading