diff --git a/.deny.toml b/.deny.toml index 1976f709c9..96b182ac25 100644 --- a/.deny.toml +++ b/.deny.toml @@ -36,14 +36,11 @@ allow = [ "Unicode-3.0", "Zlib", ] +private = { ignore = true } [sources] allow-git = [ # Waiting on releases; used in examples only - "https://github.com/Razaekel/noise-rs", - - "https://github.com/grovesNL/glow", - "https://github.com/gfx-rs/metal-rs", ] unknown-registry = "deny" unknown-git = "deny" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index d7b9a62cd1..5ebd16d7c2 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -20,6 +20,5 @@ person(s) who reviewed your changes. This will make sure it gets re-added to the - [ ] Run `taplo format`. - [ ] Run `cargo clippy`. If applicable, add: - [ ] `--target wasm32-unknown-unknown` - - [ ] `--target wasm32-unknown-emscripten` - [ ] Run `cargo xtask test` to run tests. - [ ] Add change to `CHANGELOG.md`. See simple instructions inside file. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f7a3cbe23..9a932ffcd3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,11 +3,11 @@ name: CI on: push: branches-ignore: [ - # We don't need to run on dependabot PRs. - "dependabot/**", - # This is the branch the merge queue creates. - "gh-readonly-queue/**" - ] + # We don't need to run on dependabot PRs. + "dependabot/**", + # This is the branch the merge queue creates. + "gh-readonly-queue/**", + ] tags: [v0.*] pull_request: merge_group: @@ -20,7 +20,7 @@ env: # Sourced from https://vulkan.lunarg.com/sdk/home#linux # # We don't include the 4th version number, as it's not used in any URL. - VULKAN_SDK_VERSION: "1.3.290" + VULKAN_SDK_VERSION: "1.4.304" # Sourced from https://www.nuget.org/packages/Microsoft.Direct3D.WARP WARP_VERSION: "1.0.13" @@ -32,14 +32,10 @@ env: # Sourced from https://archive.mesa3d.org/. Bumping this requires # updating the mesa build in https://github.com/gfx-rs/ci-build and creating a new release. - MESA_VERSION: "24.2.3" + MESA_VERSION: "24.3.4" # Corresponds to https://github.com/gfx-rs/ci-build/releases - CI_BINARY_BUILD: "build19" + CI_BINARY_BUILD: "build20" - # We sometimes need nightly to use special things in CI. - # - # In order to prevent CI regressions, we pin the nightly version. - NIGHTLY_VERSION: "nightly-2024-10-17" # This is the MSRV used by `wgpu` itself and all surrounding infrastructure. REPO_MSRV: "1.83" # This is the MSRV used by the `wgpu-core`, `wgpu-hal`, and `wgpu-types` crates, @@ -59,7 +55,7 @@ env: RUSTFLAGS: -D warnings RUSTDOCFLAGS: -D warnings WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes - CACHE_SUFFIX: c # cache busting + CACHE_SUFFIX: d # cache busting WGPU_TESTING: true # We distinguish the following kinds of builds: @@ -67,7 +63,7 @@ env: # - web: build for the Web # - em: build for the Emscripten -# For build time and size optimization we disable debug +# For build time and size optimization we disable debug symbols # entirely on clippy jobs and reduce it to line-numbers # only for ones where we run tests. # @@ -161,7 +157,7 @@ jobs: target: wasm32-unknown-emscripten tier: 2 kind: wgpu-only - + # TODO: Uncomment once web-sys updates past 0.3.76 # See https://github.com/rustwasm/wasm-bindgen/pull/4378 for details # - name: WebAssembly Core 1.0 @@ -169,7 +165,7 @@ jobs: # target: wasm32v1-none # tier: 2 # kind: no_std - + # Bare-metal x86-64 # TODO: Remove once web-sys updates past 0.3.76 # Included while wasm32v1-none is failing to ensure `no_std` does not regress @@ -211,13 +207,15 @@ jobs: rustup toolchain install ${{ env.REPO_MSRV }} --no-self-update --profile=minimal --component clippy,rust-src echo "RUSTC_BOOTSTRAP=1" >> "$GITHUB_ENV" - - name: disable debug + - name: disable debug symbols shell: bash run: | mkdir -p .cargo - echo """ - [profile.dev] - debug = false" >> .cargo/config.toml + + cat <> .cargo/config.toml + [profile.dev] + debug = false + EOF - name: caching uses: Swatinem/rust-cache@v2 @@ -254,6 +252,9 @@ jobs: cargo clippy --target ${{ matrix.target }} ${{ matrix.extra-flags }} --tests --features glsl,spirv cargo doc --target ${{ matrix.target }} ${{ matrix.extra-flags }} --no-deps --features glsl,spirv + # check with no features + cargo clippy --target ${{ matrix.target }} ${{ matrix.extra-flags }} --no-default-features + # all features cargo clippy --target ${{ matrix.target }} ${{ matrix.extra-flags }} --tests --all-features cargo doc --target ${{ matrix.target }} ${{ matrix.extra-flags }} --no-deps --all-features @@ -273,7 +274,7 @@ jobs: # Check with all features. cargo clippy --target ${{ matrix.target }} ${{ matrix.extra-flags }} -p wgpu-hal --all-features cargo clippy --target ${{ matrix.target }} ${{ matrix.extra-flags }} -p wgpu --all-features - + # Building for no_std platforms where every feature is enabled except "std". - name: check no_std if: matrix.kind == 'no_std' @@ -356,13 +357,15 @@ jobs: rustup override set ${{ env.CORE_MSRV }} cargo -V - - name: disable debug + - name: disable debug symbols shell: bash run: | mkdir -p .cargo - echo """ - [profile.dev] - debug = false" >> .cargo/config.toml + + cat <> .cargo/config.toml + [profile.dev] + debug = false + EOF - name: caching uses: Swatinem/rust-cache@v2 @@ -396,15 +399,15 @@ jobs: - name: Install Nightly Toolchain run: | - rustup toolchain install ${{ env.NIGHTLY_VERSION }} --no-self-update --profile=minimal --component clippy - cargo +${{ env.NIGHTLY_VERSION }} -V + rustup toolchain install ${{ env.REPO_MSRV }} --no-self-update --profile=minimal + cargo +${{ env.REPO_MSRV }} -V - name: Install cargo-hack uses: taiki-e/install-action@v2 with: tool: cargo-hack - - name: disable debug + - name: disable debug symbols shell: bash run: | mkdir -p .cargo @@ -417,7 +420,9 @@ jobs: run: | set -e - cargo +${{ env.NIGHTLY_VERSION }} hack generate-lockfile --remove-dev-deps -Z minimal-versions -p naga -p naga-cli + cargo +${{ env.REPO_MSRV }} hack generate-lockfile --remove-dev-deps -Z minimal-versions -p naga -p naga-cli + env: + RUSTC_BOOTSTRAP: 1 - name: Clippy shell: bash @@ -432,7 +437,6 @@ jobs: name: Test WebAssembly runs-on: ubuntu-latest - needs: [check] steps: - name: checkout repo @@ -472,11 +476,10 @@ jobs: # Linux - name: Linux x86_64 - os: ubuntu-22.04 + os: ubuntu-24.04 name: Test ${{ matrix.name }} runs-on: ${{ matrix.os }} - needs: [check] steps: - name: checkout repo @@ -492,6 +495,16 @@ jobs: with: tool: cargo-nextest,cargo-llvm-cov + - name: debug symbols to line-tables-only + shell: bash + run: | + mkdir -p .cargo + + cat <> .cargo/config.toml + [profile.dev] + debug = "line-tables-only" + EOF + # Cache step must go before warp and mesa install on windows as they write into the # target directory, and rust-cache will overwrite the entirety of the target directory. - name: caching @@ -548,7 +561,7 @@ jobs: echo "GALLIUM_DRIVER=llvmpipe" >> "$GITHUB_ENV" - name: (linux) install vulkan sdk - if: matrix.os == 'ubuntu-22.04' + if: matrix.os == 'ubuntu-24.04' shell: bash run: | set -e @@ -557,13 +570,13 @@ jobs: # vulkan sdk wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-$VULKAN_SDK_VERSION-jammy.list https://packages.lunarg.com/vulkan/$VULKAN_SDK_VERSION/lunarg-vulkan-$VULKAN_SDK_VERSION-jammy.list + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-$VULKAN_SDK_VERSION-noble.list https://packages.lunarg.com/vulkan/$VULKAN_SDK_VERSION/lunarg-vulkan-$VULKAN_SDK_VERSION-noble.list sudo apt-get update sudo apt install -y vulkan-sdk - name: (linux) install mesa - if: matrix.os == 'ubuntu-22.04' + if: matrix.os == 'ubuntu-24.04' shell: bash run: | set -e @@ -589,14 +602,6 @@ jobs: echo "LD_LIBRARY_PATH=$PWD/mesa/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH" >> "$GITHUB_ENV" echo "LIBGL_DRIVERS_PATH=$PWD/mesa/lib/x86_64-linux-gnu/dri" >> "$GITHUB_ENV" - - name: disable debug - shell: bash - run: | - mkdir -p .cargo - echo """ - [profile.dev] - debug = 1" >> .cargo/config.toml - - name: run wgpu-info shell: bash run: | @@ -659,14 +664,6 @@ jobs: rustup override set ${{ env.REPO_MSRV }} cargo -V - - name: disable debug - shell: bash - run: | - mkdir -p .cargo - echo """ - [profile.dev] - debug = 1" >> .cargo/config.toml - - name: caching uses: Swatinem/rust-cache@v2 with: @@ -710,7 +707,7 @@ jobs: run: taplo format --check --diff - name: Check for typos - uses: crate-ci/typos@v1.29.4 + uses: crate-ci/typos@v1.29.5 check-cts-runner: # runtime is normally 2 minutes @@ -728,13 +725,15 @@ jobs: rustup override set ${{ env.REPO_MSRV }} cargo -V - - name: disable debug + - name: disable debug symbols shell: bash run: | mkdir -p .cargo - echo """ - [profile.dev] - debug = 1" >> .cargo/config.toml + + cat <> .cargo/config.toml + [profile.dev] + debug = false + EOF - name: caching uses: Swatinem/rust-cache@v2 @@ -781,3 +780,22 @@ jobs: command: check bans licenses sources arguments: --all-features --workspace rust-version: ${{ env.REPO_MSRV }} + + check-feature-dependencies: + # runtime is normally 1 minute + timeout-minutes: 5 + + name: "Feature Dependencies" + runs-on: ubuntu-latest + steps: + - name: checkout repo + uses: actions/checkout@v4 + + - name: Install Repo MSRV toolchain + run: | + rustup toolchain install ${{ env.REPO_MSRV }} --no-self-update --profile=minimal + rustup override set ${{ env.REPO_MSRV }} + cargo -V + + - name: Run `cargo feature-dependencies` + run: cargo xtask check-feature-dependencies diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 43184ce4ea..7f03970af0 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,9 +9,8 @@ on: - trunk env: - # We need to use nightly for various features - # when building docs.rs style docs. - NIGHTLY_VERSION: nightly-2025-01-09 + # This is the MSRV used by `wgpu` itself and all surrounding infrastructure. + REPO_MSRV: "1.83" CARGO_INCREMENTAL: false CARGO_TERM_COLOR: always @@ -28,17 +27,31 @@ jobs: persist-credentials: false - name: Install documentation toolchain - run: rustup toolchain install ${{ env.NIGHTLY_VERSION }} --no-self-update --profile=minimal + run: | + rustup toolchain install ${{ env.REPO_MSRV }} --no-self-update --profile=minimal + rustup override set ${{ env.REPO_MSRV }} - - name: Build the docs (nightly) + - name: disable debug symbols + shell: bash run: | - cargo +${{ env.NIGHTLY_VERSION }} doc --no-deps --lib + mkdir -p .cargo + + cat <> .cargo/config.toml + [profile.dev] + debug = false + EOF + + - name: caching + uses: Swatinem/rust-cache@v2 + with: + key: doc-build + + - name: Build the docs + run: | + cargo doc --no-deps --lib --document-private-items env: RUSTDOCFLAGS: --cfg docsrs - - - name: Build the docs (stable) - run: cargo +stable doc --no-deps --lib - if: ${{ failure() }} + RUSTC_BOOTSTRAP: 1 - name: Deploy the docs uses: JamesIves/github-pages-deploy-action@v4.7.2 diff --git a/.github/workflows/generate.yml b/.github/workflows/generate.yml new file mode 100644 index 0000000000..421f9b798a --- /dev/null +++ b/.github/workflows/generate.yml @@ -0,0 +1,77 @@ +name: cargo-generate + +on: + push: + branches: ["*"] + tags: [v0.*] + pull_request: + merge_group: + +env: + # + # Dependency versioning + # + + # This is the MSRV used by `wgpu` itself and all surrounding infrastructure. + REPO_MSRV: "1.83" + RUSTFLAGS: -D warnings + +jobs: + cargo-generate: + timeout-minutes: 5 + + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + include: + - name: "01-hello-compute" + path: "examples/standalone/01_hello_compute" + - name: "02-hello-window" + path: "examples/standalone/02_hello_window" + + name: "${{ matrix.name }}" + + steps: + - uses: actions/checkout@v4 + + - name: Install Repo MSRV toolchain + run: | + rustup toolchain install ${{ env.REPO_MSRV }} --no-self-update --profile=minimal + rustup override set ${{ env.REPO_MSRV }} + cargo -V + + - name: disable debug symbols + shell: bash + run: | + mkdir -p .cargo + + cat <> .cargo/config.toml + [profile.dev] + debug = false + EOF + + - name: caching + uses: Swatinem/rust-cache@v2 + with: + key: cargo-generate-${{ matrix.name }} + + - name: "Install cargo-generate" + uses: taiki-e/install-action@v2 + with: + tool: cargo-generate + + - name: "Run cargo-generate" + run: | + cd .. + cargo generate --path wgpu --name ${{ matrix.name }} ${{ matrix.path }} + + - name: "Check generated files" + run: | + cd ../${{ matrix.name }}/ + cat <> Cargo.toml + [patch.crates-io] + wgpu = { path = "../wgpu/wgpu" } + EOF + cargo check diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 677541af30..c87cbb141e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -12,7 +12,6 @@ env: CARGO_INCREMENTAL: false CARGO_TERM_COLOR: always RUST_BACKTRACE: full - RUSTFLAGS: jobs: publish: @@ -37,6 +36,21 @@ jobs: - name: Install wasm-bindgen run: cargo +stable install wasm-bindgen-cli --version=$WASM_BINDGEN_VERSION + - name: debug symbols to line-tables-only + shell: bash + run: | + mkdir -p .cargo + + cat <> .cargo/config.toml + [profile.dev] + debug = "line-tables-only" + EOF + + - name: caching + uses: Swatinem/rust-cache@v2 + with: + key: publish-build + - name: Build examples run: cargo xtask run-wasm --no-serve diff --git a/.github/workflows/shaders.yml b/.github/workflows/shaders.yml index c85bcae290..ee155ea59f 100644 --- a/.github/workflows/shaders.yml +++ b/.github/workflows/shaders.yml @@ -3,11 +3,11 @@ name: Shaders on: push: branches-ignore: [ - # We don't need to run on dependabot PRs. - "dependabot/**", - # This is the branch the merge queue creates. - "gh-readonly-queue/**" - ] + # We don't need to run on dependabot PRs. + "dependabot/**", + # This is the branch the merge queue creates. + "gh-readonly-queue/**", + ] tags: [v0.*] pull_request: merge_group: @@ -16,9 +16,7 @@ env: # Sourced from https://vulkan.lunarg.com/sdk/home#linux # # We don't include the 4th version number, as it's not used in any URL. - # - # Held back from 1.3.290 by https://github.com/gfx-rs/wgpu/issues/6307 - VULKAN_SDK_VERSION: "1.3.283" + VULKAN_SDK_VERSION: "1.4.304" # Sourced from https://github.com/microsoft/DirectXShaderCompiler/releases # @@ -33,6 +31,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: debug symbols to line-tables-only + shell: bash + run: | + mkdir -p .cargo + + cat <> .cargo/config.toml + [profile.dev] + debug = "line-tables-only" + EOF + - uses: Swatinem/rust-cache@v2 with: workspaces: | @@ -78,6 +86,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: debug symbols to line-tables-only + shell: bash + run: | + mkdir -p .cargo + + cat <> .cargo/config.toml + [profile.dev] + debug = "line-tables-only" + EOF + - uses: Swatinem/rust-cache@v2 with: workspaces: | @@ -89,7 +107,7 @@ jobs: naga-validate-linux: name: "Validate: SPIR-V/GLSL/DOT/WGSL" - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 @@ -102,7 +120,7 @@ jobs: # vulkan sdk wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-$VULKAN_SDK_VERSION-jammy.list https://packages.lunarg.com/vulkan/$VULKAN_SDK_VERSION/lunarg-vulkan-$VULKAN_SDK_VERSION-jammy.list + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-$VULKAN_SDK_VERSION-noble.list https://packages.lunarg.com/vulkan/$VULKAN_SDK_VERSION/lunarg-vulkan-$VULKAN_SDK_VERSION-noble.list sudo apt-get update sudo apt install -y vulkan-sdk @@ -110,6 +128,16 @@ jobs: - name: Install graphviz run: sudo apt-get install graphviz + - name: debug symbols to line-tables-only + shell: bash + run: | + mkdir -p .cargo + + cat <> .cargo/config.toml + [profile.dev] + debug = "line-tables-only" + EOF + - uses: Swatinem/rust-cache@v2 with: workspaces: | diff --git a/CHANGELOG.md b/CHANGELOG.md index e6009f3c46..6101c7e5bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,69 @@ Bottom level categories: ## Unreleased +### New Features + +#### Naga + +- Support @must_use attribute on function declarations. By @turbocrime in [#6801](https://github.com/gfx-rs/wgpu/pull/6801). + +### Changes + +#### General + +- Avoid using default features in many dependencies, etc. By Brody in [#7031](https://github.com/gfx-rs/wgpu/pull/7031) +- Use `hashbrown` to simplify no-std support. By Brody in [#6938](https://github.com/gfx-rs/wgpu/pull/6938) & [#6925](https://github.com/gfx-rs/wgpu/pull/6925). +- If you use Binding Arrays in a bind group, you may not use Dynamic Offset Buffers or Uniform Buffers in that bind group. By @cwfitzgerald in [#6811](https://github.com/gfx-rs/wgpu/pull/6811) +- Rename `instance_id` and `instance_custom_index` to `instance_index` and `instance_custom_data` by @Vecvec in + [#6780](https://github.com/gfx-rs/wgpu/pull/6780) + +##### Refactored internal trace path parameter + +Refactored some functions to handle the internal trace path as a string to avoid possible issues with `no_std` support. + +By @brodycj in [#6924](https://github.com/gfx-rs/wgpu/pull/6924). + +#### Vulkan + +##### HAL queue callback support + +- Add a way to notify with `Queue::submit()` to Vulkan's `vk::Semaphore` allocated outside of wgpu. By @sotaroikeda in [#6813](https://github.com/gfx-rs/wgpu/pull/6813). + +### Bug Fixes + +#### Naga + +- Fix some instances of functions which have a return type but don't return a value being incorrectly validated. By @jamienicol in [#7013](https://github.com/gfx-rs/wgpu/pull/7013). +- Allow abstract expressions to be used in WGSL function return statements. By @jamienicol in [#7035](https://github.com/gfx-rs/wgpu/pull/7035). + +#### General + +- Avoid overflow in query set bounds check validation. By @ErichDonGubler in [#6933](https://github.com/gfx-rs/wgpu/pull/6933). +- Add Flush to GL Queue::submit. By @cwfitzgerald in [#6941](https://github.com/gfx-rs/wgpu/pull/6941). +- Fix `wgpu` not building with `--no-default-features` on when targeting `wasm32-unknown-unknown`. By @wumpf in [#6946](https://github.com/gfx-rs/wgpu/pull/6946). +- Fix `CopyExternalImageDestInfo` not exported on `wgpu`. By @wumpf in [#6962](https://github.com/gfx-rs/wgpu/pull/6962). +- Fix drop order in `Surface`. By @ed-2100 in [#6997](https://github.com/gfx-rs/wgpu/pull/6997) +- Fix a possible deadlock within `Queue::write_texture`. By @metamuffin in [#7004](https://github.com/gfx-rs/wgpu/pull/7004) + +#### Vulkan + +- Stop naga causing undefined behavior when a ray query misses. By @Vecvec in [#6752](https://github.com/gfx-rs/wgpu/pull/6752). + +#### Dx12 + +- Fix HLSL storage format generation. By @Vecvec in [#6993](https://github.com/gfx-rs/wgpu/pull/6993) +- Fix 3D storage texture bindings. By @SparkyPotato in [#7071](https://github.com/gfx-rs/wgpu/pull/7071) + +#### WebGPU + +- Improve efficiency of dropping read-only buffer mappings. By @kpreid in [#7007](https://github.com/gfx-rs/wgpu/pull/7007). + +### Documentation + +- Improved documentation around pipeline caches and `TextureBlitter`. By @DJMcNab in [#6978](https://github.com/gfx-rs/wgpu/pull/6978) and [#7003](https://github.com/gfx-rs/wgpu/pull/7003). + +- Added a hello window example. By @laycookie in [#6992](https://github.com/gfx-rs/wgpu/pull/6992). + ## v24.0.0 (2025-01-15) ### Major changes @@ -50,6 +113,17 @@ The crate `wgpu` has two different "backends", one which targets webgpu in the b By @cwfitzgerald in [#6619](https://github.com/gfx-rs/wgpu/pull/6619). +#### Most objects in `wgpu` are now `Clone` + +All types in the `wgpu` API are now `Clone`. +This is implemented with internal reference counting, so cloning for instance a `Buffer` does copies only the "handle" of the GPU buffer, not the underlying resource. + +Previously, libraries using `wgpu` objects like `Device`, `Buffer` or `Texture` etc. often had to manually wrap them in a `Arc` to allow passing between libraries. +This caused a lot of friction since if one library wanted to use a `Buffer` by value, calling code had to give up ownership of the resource which may interfere with other subsystems. +Note that this also mimics how the WebGPU javascript API works where objects can be cloned and moved around freely. + +By @cwfitzgerald in [#6665](https://github.com/gfx-rs/wgpu/pull/6665). + #### Render and Compute Passes Now Properly Enforce Their Lifetime A regression introduced in 23.0.0 caused lifetimes of render and compute passes to be incorrectly enforced. While this is not @@ -212,6 +286,7 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148] - Add support for OpAtomicCompareExchange in SPIR-V frontend. By @schell in [#6590](https://github.com/gfx-rs/wgpu/pull/6590). - Implement type inference for abstract arguments to user-defined functions. By @jamienicol in [#6577](https://github.com/gfx-rs/wgpu/pull/6577). - Allow for override-expressions in array sizes. By @KentSlaney in [#6654](https://github.com/gfx-rs/wgpu/pull/6654). +- [`pointer_composite_access` WGSL language extension](https://www.w3.org/TR/WGSL/#language_extension-pointer_composite_access) is implemented. By @sagudev in [#6913](https://github.com/gfx-rs/wgpu/pull/6913) ##### General @@ -225,6 +300,7 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148] - Image atomic support in shaders. By @atlv24 in [#6706](https://github.com/gfx-rs/wgpu/pull/6706) - 64 bit image atomic support in shaders. By @atlv24 in [#5537](https://github.com/gfx-rs/wgpu/pull/5537) - Add `no_std` support to `wgpu-types`. By @bushrat011899 in [#6892](https://github.com/gfx-rs/wgpu/pull/6892). +- Added `CommandEncoder::transition_resources()` for native API interop, and allowing users to slightly optimize barriers. By @JMS55 in [6678](https://github.com/gfx-rs/wgpu/pull/6678). ##### Vulkan @@ -306,6 +382,7 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148] - In validation, forbid cycles between global expressions and types. By @jimblandy in [#6800](https://github.com/gfx-rs/wgpu-pull/6800) - Allow abstract scalars in modf and frexp results. By @jimblandy in [#6821](https://github.com/gfx-rs/wgpu-pull/6821) - In the WGSL front end, apply automatic conversions to values being assigned. By @jimblandy in [#6822](https://github.com/gfx-rs/wgpu-pull/6822) +- Fix a leak by ensuring that types that depend on expressions are correctly compacted. By @KentSlaney in [#6934](https://github.com/gfx-rs/wgpu/pull/6934). #### Vulkan @@ -531,7 +608,7 @@ By @MarijnS95 in [#6006](https://github.com/gfx-rs/wgpu/pull/6006). - When mapping buffers for reading, mark buffers as initialized only when they have `MAP_WRITE` usage. By @teoxoy in [#6178](https://github.com/gfx-rs/wgpu/pull/6178). - Add a separate pipeline constants error. By @teoxoy in [#6094](https://github.com/gfx-rs/wgpu/pull/6094). - Ensure safety of indirect dispatch by injecting a compute shader that validates the content of the indirect buffer. By @teoxoy in [#5714](https://github.com/gfx-rs/wgpu/pull/5714). -- Add conversions between `TextureFormat` and ` StorageFormat`. By @caelunshun in [#6185](https://github.com/gfx-rs/wgpu/pull/6185) +- Add conversions between `TextureFormat` and `StorageFormat`. By @caelunshun in [#6185](https://github.com/gfx-rs/wgpu/pull/6185) #### GLES / OpenGL @@ -619,7 +696,7 @@ This release includes `wgpu`, `wgpu-core` and `naga`. All other crates remain at ### Our first major version release! -For the first time ever, WGPU is being released with a major version (i.e., 22.* instead of 0.22.*)! Maintainership has decided to fully adhere to [Semantic Versioning](https://semver.org/)'s recommendations for versioning production software. According to [SemVer 2.0.0's Q&A about when to use 1.0.0 versions (and beyond)](https://semver.org/spec/v2.0.0.html#how-do-i-know-when-to-release-100): +For the first time ever, WGPU is being released with a major version (i.e., 22.\* instead of 0.22.\*)! Maintainership has decided to fully adhere to [Semantic Versioning](https://semver.org/)'s recommendations for versioning production software. According to [SemVer 2.0.0's Q&A about when to use 1.0.0 versions (and beyond)](https://semver.org/spec/v2.0.0.html#how-do-i-know-when-to-release-100): > ### How do I know when to release 1.0.0? > @@ -860,7 +937,7 @@ This release fixes the validation errors whenever a surface is used with the vul #### GLES / OpenGL -- Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642) +- Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642) #### Naga diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eacf57549e..e1fc71df35 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,8 +44,10 @@ The WGPU project has multiple official platforms for community engagement: - `wgpu` Maintainership Meetings: Every week, the maintainership of the wgpu project meets to discuss the project's direction and review ongoing work. These meetings are open to the public, and you are welcome to attend. They - happen on Google Meet and happen on Wednesday at 16:00 UTC and last approximately - an hour. Remember to obey the [`CODE_OF_CONDUCT.md`] in the meeting. + happen on Google Meet and happen on Wednesday at 11:00 US Eastern Standard + Time and last approximately an hour. Remember to obey the + [`CODE_OF_CONDUCT.md`] in the meeting. + - [Meeting Notes] - [Meeting Link] - [GitHub discussions]: TODO: Experimentally used by some enthusiastic members diff --git a/Cargo.lock b/Cargo.lock index 03229043af..dfa19c3bd7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,6 +61,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-activity" version = "0.5.2" @@ -75,9 +81,30 @@ dependencies = [ "jni-sys", "libc", "log", - "ndk", + "ndk 0.8.0", "ndk-context", - "ndk-sys", + "ndk-sys 0.5.0+25.2.9519653", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "android-activity" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef6978589202a00cd7e118380c448a08b6ed394c3a8df3a430d0898e3a42d046" +dependencies = [ + "android-properties", + "bitflags 2.8.0", + "cc", + "cesu8", + "jni", + "jni-sys", + "libc", + "log", + "ndk 0.9.0", + "ndk-context", + "ndk-sys 0.6.0+11769913", "num_enum", "thiserror 1.0.69", ] @@ -159,6 +186,15 @@ version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "arbitrary" version = "1.4.1" @@ -232,9 +268,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.85" +version = "0.1.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", @@ -402,14 +438,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "15b55663a85f33501257357e6421bb33e769d5c9ffb5ba0921c975a123e35e68" dependencies = [ "block-sys", - "objc2", + "objc2 0.4.1", +] + +[[package]] +name = "block2" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c132eebf10f5cad5289222520a4a058514204aed6d791f1cf4fe8088b82d15f" +dependencies = [ + "objc2 0.5.2", ] [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "bytemuck" @@ -457,13 +502,39 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "calloop" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b99da2f8558ca23c71f4fd15dc57c906239752dd27ff3c00a1d56b685b7cbfec" +dependencies = [ + "bitflags 2.8.0", + "log", + "polling", + "rustix", + "slab", + "thiserror 1.0.69", +] + [[package]] name = "calloop-wayland-source" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f0ea9b9476c7fad82841a8dbb380e2eae480c21910feba80725b46931ed8f02" dependencies = [ - "calloop", + "calloop 0.12.4", + "rustix", + "wayland-backend", + "wayland-client", +] + +[[package]] +name = "calloop-wayland-source" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a66a987056935f7efce4ab5668920b5d0dac4a7c99991a67395f13702ddd20" +dependencies = [ + "calloop 0.13.0", "rustix", "wayland-backend", "wayland-client", @@ -477,9 +548,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.9" +version = "1.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8293772165d9345bdaaa39b45b2109591e63fe5e6fbc23c6ff930a048aa310b" +checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" dependencies = [ "jobserver", "libc", @@ -568,9 +639,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.26" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" +checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" dependencies = [ "clap_builder", "clap_derive", @@ -578,9 +649,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.26" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" +checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" dependencies = [ "anstream", "anstyle", @@ -791,9 +862,9 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "ctor" @@ -994,6 +1065,7 @@ name = "deno_webgpu" version = "0.146.0" dependencies = [ "deno_core", + "hashbrown", "raw-window-handle 0.6.2", "serde", "thiserror 2.0.11", @@ -1069,6 +1141,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" +[[package]] +name = "dpi" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f25c0e292a7ca6d6498557ff1df68f32c99850012b6ea401cf8daf771f22ff53" + [[package]] name = "either" version = "1.13.0" @@ -1152,7 +1230,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1219,12 +1297,6 @@ dependencies = [ "spin", ] -[[package]] -name = "foldhash" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" - [[package]] name = "foreign-types" version = "0.5.0" @@ -1480,7 +1552,7 @@ dependencies = [ "glutin_wgl_sys 0.5.0", "icrate", "libloading", - "objc2", + "objc2 0.4.1", "once_cell", "raw-window-handle 0.5.2", "wayland-sys", @@ -1497,7 +1569,7 @@ dependencies = [ "cfg_aliases 0.1.1", "glutin", "raw-window-handle 0.5.2", - "winit", + "winit 0.29.15", ] [[package]] @@ -1561,9 +1633,9 @@ dependencies = [ [[package]] name = "gpu-descriptor" -version = "0.3.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf29e94d6d243368b7a56caa16bc213e4f9f8ed38c4d9557069527b5d5281ca" +checksum = "9c08c1f623a8d0b722b8b99f821eb0ba672a1618f0d3b16ddbee1cedd2dd8557" dependencies = [ "bitflags 2.8.0", "gpu-descriptor-types", @@ -1600,11 +1672,13 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "foldhash", + "ahash", + "allocator-api2", + "serde", ] [[package]] @@ -1660,9 +1734,9 @@ version = "0.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d3aaff8a54577104bafdf686ff18565c3b6903ca5782a2026ef06e2c7aa319" dependencies = [ - "block2", + "block2 0.3.0", "dispatch", - "objc2", + "objc2 0.4.1", ] [[package]] @@ -1825,9 +1899,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "arbitrary", "equivalent", @@ -1837,9 +1911,9 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.13" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37" dependencies = [ "hermit-abi", "libc", @@ -1973,7 +2047,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -2176,6 +2250,7 @@ dependencies = [ "codespan-reporting", "diff", "env_logger", + "hashbrown", "hexf-parse", "hlsl-snapshots", "indexmap", @@ -2250,13 +2325,28 @@ dependencies = [ "bitflags 2.8.0", "jni-sys", "log", - "ndk-sys", + "ndk-sys 0.5.0+25.2.9519653", "num_enum", "raw-window-handle 0.5.2", "raw-window-handle 0.6.2", "thiserror 1.0.69", ] +[[package]] +name = "ndk" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3f42e7bbe13d351b6bead8286a43aac9534b82bd3cc43e47037f012ebfd62d4" +dependencies = [ + "bitflags 2.8.0", + "jni-sys", + "log", + "ndk-sys 0.6.0+11769913", + "num_enum", + "raw-window-handle 0.6.2", + "thiserror 1.0.69", +] + [[package]] name = "ndk-context" version = "0.1.1" @@ -2272,10 +2362,20 @@ dependencies = [ "jni-sys", ] +[[package]] +name = "ndk-sys" +version = "0.6.0+11769913" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6cda3051665f1fb8d9e08fc35c96d5a244fb1be711a03b71118828afc9a873" +dependencies = [ + "jni-sys", +] + [[package]] name = "noise" -version = "0.8.2" -source = "git+https://github.com/Razaekel/noise-rs.git?rev=c6942d4fb70af26db4441edcf41f90fa115333f2#c6942d4fb70af26db4441edcf41f90fa115333f2" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6da45c8333f2e152fc665d78a380be060eb84fad8ca4c9f7ac8ca29216cff0cc" dependencies = [ "num-traits", "rand", @@ -2398,7 +2498,93 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "559c5a40fdd30eb5e344fbceacf7595a81e242529fb4e21cf5f43fb4f11ff98d" dependencies = [ "objc-sys", - "objc2-encode", + "objc2-encode 3.0.0", +] + +[[package]] +name = "objc2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a785d4eeff09c14c487497c162e92766fbb3e4059a71840cecc03d9a50b804" +dependencies = [ + "objc-sys", + "objc2-encode 4.1.0", +] + +[[package]] +name = "objc2-app-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4e89ad9e3d7d297152b17d39ed92cd50ca8063a89a9fa569046d41568891eff" +dependencies = [ + "bitflags 2.8.0", + "block2 0.5.1", + "libc", + "objc2 0.5.2", + "objc2-core-data", + "objc2-core-image", + "objc2-foundation", + "objc2-quartz-core", +] + +[[package]] +name = "objc2-cloud-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74dd3b56391c7a0596a295029734d3c1c5e7e510a4cb30245f8221ccea96b009" +dependencies = [ + "bitflags 2.8.0", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-core-location", + "objc2-foundation", +] + +[[package]] +name = "objc2-contacts" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ff520e9c33812fd374d8deecef01d4a840e7b41862d849513de77e44aa4889" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation", +] + +[[package]] +name = "objc2-core-data" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "617fbf49e071c178c0b24c080767db52958f716d9eabdf0890523aeae54773ef" +dependencies = [ + "bitflags 2.8.0", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation", +] + +[[package]] +name = "objc2-core-image" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55260963a527c99f1819c4f8e3b47fe04f9650694ef348ffd2227e8196d34c80" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation", + "objc2-metal", +] + +[[package]] +name = "objc2-core-location" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "000cfee34e683244f284252ee206a27953279d370e309649dc3ee317b37e5781" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-contacts", + "objc2-foundation", ] [[package]] @@ -2407,6 +2593,117 @@ version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d079845b37af429bfe5dfa76e6d087d788031045b25cfc6fd898486fd9847666" +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8" +dependencies = [ + "bitflags 2.8.0", + "block2 0.5.1", + "dispatch", + "libc", + "objc2 0.5.2", +] + +[[package]] +name = "objc2-link-presentation" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a1ae721c5e35be65f01a03b6d2ac13a54cb4fa70d8a5da293d7b0020261398" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-app-kit", + "objc2-foundation", +] + +[[package]] +name = "objc2-metal" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0cba1276f6023976a406a14ffa85e1fdd19df6b0f737b063b95f6c8c7aadd6" +dependencies = [ + "bitflags 2.8.0", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation", +] + +[[package]] +name = "objc2-quartz-core" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e42bee7bff906b14b167da2bac5efe6b6a07e6f7c0a21a7308d40c960242dc7a" +dependencies = [ + "bitflags 2.8.0", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation", + "objc2-metal", +] + +[[package]] +name = "objc2-symbols" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a684efe3dec1b305badae1a28f6555f6ddd3bb2c2267896782858d5a78404dc" +dependencies = [ + "objc2 0.5.2", + "objc2-foundation", +] + +[[package]] +name = "objc2-ui-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8bb46798b20cd6b91cbd113524c490f1686f4c4e8f49502431415f3512e2b6f" +dependencies = [ + "bitflags 2.8.0", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-cloud-kit", + "objc2-core-data", + "objc2-core-image", + "objc2-core-location", + "objc2-foundation", + "objc2-link-presentation", + "objc2-quartz-core", + "objc2-symbols", + "objc2-uniform-type-identifiers", + "objc2-user-notifications", +] + +[[package]] +name = "objc2-uniform-type-identifiers" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44fa5f9748dbfe1ca6c0b79ad20725a11eca7c2218bceb4b005cb1be26273bfe" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation", +] + +[[package]] +name = "objc2-user-notifications" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76cfcbf642358e8689af64cee815d139339f3ed8ad05103ed5eaf73db8d84cb3" +dependencies = [ + "bitflags 2.8.0", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-core-location", + "objc2-foundation", +] + [[package]] name = "object" version = "0.36.7" @@ -2592,7 +2889,7 @@ dependencies = [ "serde", "wgpu-core", "wgpu-types", - "winit", + "winit 0.29.15", ] [[package]] @@ -2731,9 +3028,9 @@ checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d" [[package]] name = "quick-xml" -version = "0.36.2" +version = "0.37.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" +checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" dependencies = [ "memchr", ] @@ -2779,9 +3076,9 @@ dependencies = [ [[package]] name = "range-alloc" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8a99fddc9f0ba0a85884b8d14e3592853e787d581ca1816c91349b10e4eeab" +checksum = "c3d6831663a5098ea164f89cff59c6284e95f4e3c76ce9848d4529f5ccca9bde" [[package]] name = "raw-window-handle" @@ -2824,6 +3121,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_syscall" version = "0.5.8" @@ -2933,15 +3239,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.43" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2952,9 +3258,9 @@ checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" [[package]] name = "same-file" @@ -2986,7 +3292,20 @@ dependencies = [ "ab_glyph", "log", "memmap2", - "smithay-client-toolkit", + "smithay-client-toolkit 0.18.1", + "tiny-skia", +] + +[[package]] +name = "sctk-adwaita" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6277f0217056f77f1d8f49f2950ac6c278c0d607c45f5ee99328d792ede24ec" +dependencies = [ + "ab_glyph", + "log", + "memmap2", + "smithay-client-toolkit 0.19.2", "tiny-skia", ] @@ -3027,9 +3346,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.135" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" dependencies = [ "indexmap", "itoa", @@ -3130,8 +3449,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "922fd3eeab3bd820d76537ce8f582b1cf951eceb5475c28500c7457d9d17f53a" dependencies = [ "bitflags 2.8.0", - "calloop", - "calloop-wayland-source", + "calloop 0.12.4", + "calloop-wayland-source 0.2.0", "cursor-icon", "libc", "log", @@ -3142,8 +3461,33 @@ dependencies = [ "wayland-client", "wayland-csd-frame", "wayland-cursor", - "wayland-protocols", - "wayland-protocols-wlr", + "wayland-protocols 0.31.2", + "wayland-protocols-wlr 0.2.0", + "wayland-scanner", + "xkeysym", +] + +[[package]] +name = "smithay-client-toolkit" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3457dea1f0eb631b4034d61d4d8c32074caa6cd1ab2d59f2327bd8461e2c0016" +dependencies = [ + "bitflags 2.8.0", + "calloop 0.13.0", + "calloop-wayland-source 0.3.0", + "cursor-icon", + "libc", + "log", + "memmap2", + "rustix", + "thiserror 1.0.69", + "wayland-backend", + "wayland-client", + "wayland-csd-frame", + "wayland-cursor", + "wayland-protocols 0.32.5", + "wayland-protocols-wlr 0.3.5", "wayland-scanner", "xkeysym", ] @@ -3289,9 +3633,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.96" +version = "2.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", @@ -3487,9 +3831,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.22" +version = "0.22.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee" dependencies = [ "indexmap", "serde", @@ -3565,14 +3909,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69fff37da548239c3bf9e64a12193d261e8b22b660991c6fd2df057c168f435f" dependencies = [ "cc", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] name = "trybuild" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dcd332a5496c026f1e14b7f3d2b7bd98e509660c04239c58b0ba38a12daded4" +checksum = "b812699e0c4f813b872b373a4471717d9eb550da14b311058a4d9cf4173cbca6" dependencies = [ "glob", "serde", @@ -3638,9 +3982,9 @@ checksum = "2f322b60f6b9736017344fa0635d64be2f458fbc04eef65f6be22976dd1ffd5b" [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" [[package]] name = "unicode-segmentation" @@ -3704,9 +4048,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" dependencies = [ "getrandom", "serde", @@ -3731,9 +4075,9 @@ dependencies = [ [[package]] name = "valuable" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] name = "version_check" @@ -3869,9 +4213,9 @@ dependencies = [ [[package]] name = "wayland-backend" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "056535ced7a150d45159d3a8dc30f91a2e2d588ca0b23f70e56033622b8016f6" +checksum = "b7208998eaa3870dad37ec8836979581506e0c5c64c20c9e79e9d2a10d6f47bf" dependencies = [ "cc", "downcast-rs", @@ -3927,6 +4271,18 @@ dependencies = [ "wayland-scanner", ] +[[package]] +name = "wayland-protocols" +version = "0.32.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd0ade57c4e6e9a8952741325c30bf82f4246885dca8bf561898b86d0c1f58e" +dependencies = [ + "bitflags 2.8.0", + "wayland-backend", + "wayland-client", + "wayland-scanner", +] + [[package]] name = "wayland-protocols-plasma" version = "0.2.0" @@ -3936,7 +4292,20 @@ dependencies = [ "bitflags 2.8.0", "wayland-backend", "wayland-client", - "wayland-protocols", + "wayland-protocols 0.31.2", + "wayland-scanner", +] + +[[package]] +name = "wayland-protocols-plasma" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b31cab548ee68c7eb155517f2212049dc151f7cd7910c2b66abfd31c3ee12bd" +dependencies = [ + "bitflags 2.8.0", + "wayland-backend", + "wayland-client", + "wayland-protocols 0.32.5", "wayland-scanner", ] @@ -3949,15 +4318,28 @@ dependencies = [ "bitflags 2.8.0", "wayland-backend", "wayland-client", - "wayland-protocols", + "wayland-protocols 0.31.2", + "wayland-scanner", +] + +[[package]] +name = "wayland-protocols-wlr" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "782e12f6cd923c3c316130d56205ebab53f55d6666b7faddfad36cecaeeb4022" +dependencies = [ + "bitflags 2.8.0", + "wayland-backend", + "wayland-client", + "wayland-protocols 0.32.5", "wayland-scanner", ] [[package]] name = "wayland-scanner" -version = "0.31.5" +version = "0.31.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597f2001b2e5fc1121e3d5b9791d3e78f05ba6bfa4641053846248e3a13661c3" +checksum = "896fdafd5d28145fce7958917d69f2fd44469b1d4e861cb5961bcbeebc6d1484" dependencies = [ "proc-macro2", "quick-xml", @@ -3966,9 +4348,9 @@ dependencies = [ [[package]] name = "wayland-sys" -version = "0.31.5" +version = "0.31.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efa8ac0d8e8ed3e3b5c9fc92c7881406a268e11555abe36493efabe649a29e09" +checksum = "dbcebb399c77d5aa9fa5db874806ee7b4eba4e73650948e8f93963f128896615" dependencies = [ "dlib", "log", @@ -3996,6 +4378,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "wgpu" version = "24.0.0" @@ -4004,6 +4396,7 @@ dependencies = [ "bitflags 2.8.0", "cfg_aliases 0.2.1", "document-features", + "hashbrown", "js-sys", "log", "naga", @@ -4047,6 +4440,7 @@ dependencies = [ "bytemuck", "cfg_aliases 0.2.1", "document-features", + "hashbrown", "indexmap", "log", "naga", @@ -4063,6 +4457,26 @@ dependencies = [ "wgpu-types", ] +[[package]] +name = "wgpu-example-01-hello-compute" +version = "0.0.0" +dependencies = [ + "bytemuck", + "env_logger", + "pollster", + "wgpu", +] + +[[package]] +name = "wgpu-example-02-hello-window" +version = "0.0.0" +dependencies = [ + "env_logger", + "pollster", + "wgpu", + "winit 0.30.8", +] + [[package]] name = "wgpu-examples" version = "24.0.0" @@ -4088,10 +4502,10 @@ dependencies = [ "wasm-bindgen-futures", "wasm-bindgen-test", "web-sys", - "web-time", + "web-time 1.1.0", "wgpu", "wgpu-test", - "winit", + "winit 0.29.15", ] [[package]] @@ -4117,6 +4531,7 @@ dependencies = [ "gpu-alloc", "gpu-allocator", "gpu-descriptor", + "hashbrown", "js-sys", "khronos-egl", "libc", @@ -4125,7 +4540,7 @@ dependencies = [ "mach-dxcompiler-rs", "metal", "naga", - "ndk-sys", + "ndk-sys 0.5.0+25.2.9519653", "objc", "once_cell", "ordered-float", @@ -4143,7 +4558,7 @@ dependencies = [ "wgpu-types", "windows", "windows-core", - "winit", + "winit 0.29.15", ] [[package]] @@ -4153,6 +4568,7 @@ dependencies = [ "anyhow", "bitflags 2.8.0", "env_logger", + "hashbrown", "pico-args", "serde", "serde_json", @@ -4173,6 +4589,7 @@ name = "wgpu-test" version = "24.0.0" dependencies = [ "anyhow", + "approx", "arrayvec", "bitflags 2.8.0", "bytemuck", @@ -4202,7 +4619,6 @@ dependencies = [ "web-sys", "wgpu", "wgpu-macros", - "wgpu-types", ] [[package]] @@ -4263,7 +4679,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] @@ -4557,11 +4973,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d59ad965a635657faf09c8f062badd885748428933dad8e8bdd64064d92e5ca" dependencies = [ "ahash", - "android-activity", + "android-activity 0.5.2", "atomic-waker", "bitflags 2.8.0", "bytemuck", - "calloop", + "calloop 0.12.4", "cfg_aliases 0.1.1", "core-foundation", "core-graphics", @@ -4571,9 +4987,9 @@ dependencies = [ "libc", "log", "memmap2", - "ndk", - "ndk-sys", - "objc2", + "ndk 0.8.0", + "ndk-sys 0.5.0+25.2.9519653", + "objc2 0.4.1", "once_cell", "orbclient", "percent-encoding", @@ -4581,29 +4997,81 @@ dependencies = [ "raw-window-handle 0.6.2", "redox_syscall 0.3.5", "rustix", - "sctk-adwaita", - "smithay-client-toolkit", + "sctk-adwaita 0.8.3", + "smithay-client-toolkit 0.18.1", "smol_str", "unicode-segmentation", "wasm-bindgen", "wasm-bindgen-futures", "wayland-backend", "wayland-client", - "wayland-protocols", - "wayland-protocols-plasma", + "wayland-protocols 0.31.2", + "wayland-protocols-plasma 0.2.0", "web-sys", - "web-time", + "web-time 0.2.4", "windows-sys 0.48.0", "x11-dl", "x11rb", "xkbcommon-dl", ] +[[package]] +name = "winit" +version = "0.30.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5d74280aabb958072864bff6cfbcf9025cf8bfacdde5e32b5e12920ef703b0f" +dependencies = [ + "ahash", + "android-activity 0.6.0", + "atomic-waker", + "bitflags 2.8.0", + "block2 0.5.1", + "bytemuck", + "calloop 0.13.0", + "cfg_aliases 0.2.1", + "concurrent-queue", + "core-foundation", + "core-graphics", + "cursor-icon", + "dpi", + "js-sys", + "libc", + "memmap2", + "ndk 0.9.0", + "objc2 0.5.2", + "objc2-app-kit", + "objc2-foundation", + "objc2-ui-kit", + "orbclient", + "percent-encoding", + "pin-project", + "raw-window-handle 0.6.2", + "redox_syscall 0.4.1", + "rustix", + "sctk-adwaita 0.10.1", + "smithay-client-toolkit 0.19.2", + "smol_str", + "tracing", + "unicode-segmentation", + "wasm-bindgen", + "wasm-bindgen-futures", + "wayland-backend", + "wayland-client", + "wayland-protocols 0.32.5", + "wayland-protocols-plasma 0.3.5", + "web-sys", + "web-time 1.1.0", + "windows-sys 0.52.0", + "x11-dl", + "x11rb", + "xkbcommon-dl", +] + [[package]] name = "winnow" -version = "0.6.24" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8d71a593cc5c42ad7876e2c1fda56f314f3754c084128833e64f1345ff8a03a" +checksum = "7e49d2d35d3fad69b39b94139037ecfb4f359f08958b9c11e7315ce770462419" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index 6989934936..663eac96cf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,8 @@ members = [ # default members "benches", - "examples", + "examples/features", + "examples/standalone/*", "lock-analyzer", "naga-cli", "naga", @@ -24,7 +25,8 @@ members = [ exclude = [] default-members = [ "benches", - "examples", + "examples/features", + "examples/standalone/*", "lock-analyzer", "naga-cli", "naga", @@ -43,6 +45,7 @@ default-members = [ [workspace.lints.clippy] manual_c_str_literals = "allow" ref_as_ptr = "warn" +# NOTE: disallowed-types is configured in other file: clippy.toml [workspace.package] edition = "2021" @@ -54,31 +57,28 @@ repository = "https://github.com/gfx-rs/wgpu" version = "24.0.0" authors = ["gfx-rs developers"] -[workspace.dependencies.wgc] -package = "wgpu-core" -path = "./wgpu-core" -version = "24.0.0" - -[workspace.dependencies.wgt] -package = "wgpu-types" -path = "./wgpu-types" -version = "24.0.0" - -[workspace.dependencies.hal] -package = "wgpu-hal" -path = "./wgpu-hal" -version = "24.0.0" - -[workspace.dependencies.naga] -path = "./naga" -version = "24.0.0" - [workspace.dependencies] -anyhow = "1.0.95" +naga = { version = "24.0.0", path = "./naga" } +wgpu = { version = "24.0.0", path = "./wgpu", default-features = false, features = [ + "serde", + "wgsl", + "dx12", + "metal", + "static-dxc", +] } +wgpu-core = { version = "24.0.0", path = "./wgpu-core" } +wgpu-hal = { version = "24.0.0", path = "./wgpu-hal" } +wgpu-macros = { version = "24.0.0", path = "./wgpu-macros" } +wgpu-test = { version = "24.0.0", path = "./tests" } +wgpu-types = { version = "24.0.0", path = "./wgpu-types" } + +anyhow = { version = "1.0.95", default-features = false } +approx = "0.5" argh = "0.1.13" -arrayvec = "0.7" +arrayvec = { version = "0.7.6", default-features = false } bincode = "1" -bit-vec = "0.8" +bit-set = { version = "0.8", default-features = false } +bit-vec = { version = "0.8", default-features = false } bitflags = "2.7" bytemuck = { version = "1.21", features = ["derive", "min_const_generics"] } cfg_aliases = "0.2.1" @@ -94,23 +94,27 @@ flume = "0.11" futures-lite = "2" getrandom = "0.2" glam = "0.29" +hashbrown = { version = "0.14.5", default-features = false, features = [ + "ahash", + "inline-more", +] } heck = "0.5.0" image = { version = "0.24", default-features = false, features = ["png"] } -indexmap = "2" +indexmap = { version = "2.5.0", default-features = false } itertools = { version = "0.13.0" } ktx2 = "0.3" -libc = "0.2" +libc = { version = "0.2", default-features = false } libloading = "0.8" libtest-mimic = "0.8.1" log = "0.4" nanorand = { version = "0.7", default-features = false, features = ["wyrand"] } -# https://github.com/Razaekel/noise-rs/issues/335 (Updated dependencies) -noise = { version = "0.8", git = "https://github.com/Razaekel/noise-rs.git", rev = "c6942d4fb70af26db4441edcf41f90fa115333f2" } +noise = "0.9" nv-flip = "0.1" obj = "0.10" -once_cell = "1.20.2" +# NOTE: once_cell/std is *required* for some commonly-used features, selecting this per crate +once_cell = { version = "1.20.2", default-features = false } # Firefox has 3.4.0 vendored, so we allow that version in our dependencies -ordered-float = ">=3,<=4.6" +ordered-float = { version = ">=3,<=4.6", default-features = false } parking_lot = "0.12.1" pico-args = { version = "0.5.0", features = [ "eq-separator", @@ -120,31 +124,22 @@ pico-args = { version = "0.5.0", features = [ png = "0.17.16" pollster = "0.4" profiling = { version = "1", default-features = false } -raw-window-handle = "0.6" +raw-window-handle = { version = "0.6", default-features = false } rayon = "1" renderdoc-sys = "1.1.0" ron = "0.8" -# rustc-hash 2.0 is a completely different hasher with different performance characteristics -serde_json = "1.0.134" -rustc-hash = "1" +# NOTE: rustc-hash v2 is a completely different hasher with different performance characteristics +# see discussion here (including with some other alternatives): https://github.com/gfx-rs/wgpu/issues/6999 +# (using default-features = false to support no-std build, avoiding any extra features that may require std::collections) +rustc-hash = { version = "1", default-features = false } +serde_json = "1.0.138" serde = { version = "1", default-features = false } smallvec = "1" static_assertions = "1.1.0" -strum = { version = "0.26.0", features = ["derive"] } +strum = { version = "0.26.3", default-features = false, features = ["derive"] } trybuild = "1" tracy-client = "0.17" -thiserror = "2" -wgpu = { version = "24.0.0", path = "./wgpu", default-features = false, features = [ - "serde", - "wgsl", - "dx12", - "metal", - "static-dxc", -] } -wgpu-core = { version = "24.0.0", path = "./wgpu-core" } -wgpu-macros = { version = "24.0.0", path = "./wgpu-macros" } -wgpu-test = { version = "24.0.0", path = "./tests" } -wgpu-types = { version = "24.0.0", path = "./wgpu-types" } +thiserror = { version = "2", default-features = false } winit = { version = "0.29", features = ["android-native-activity"] } # Metal dependencies @@ -159,8 +154,7 @@ ash = "0.38.0" gpu-alloc = "0.6" gpu-descriptor = "0.3" -# DX dependencies -bit-set = "0.8" +# DX12 dependencies gpu-allocator = { version = "0.27", default-features = false } range-alloc = "0.1" mach-dxcompiler-rs = { version = "0.1.4", default-features = false } @@ -173,7 +167,7 @@ glutin = { version = "0.31", default-features = false } glutin-winit = { version = "0.4", default-features = false } glutin_wgl_sys = "0.6" -# DX and GLES dependencies +# DX12 and GLES dependencies windows = { version = "0.58", default-features = false } # wasm32 dependencies @@ -184,7 +178,7 @@ wasm-bindgen = "0.2.97" wasm-bindgen-futures = "0.4.45" wasm-bindgen-test = "0.3" web-sys = { version = "0.3.74", default-features = false } -web-time = "0.2.4" +web-time = "1.1.0" # deno dependencies deno_console = "0.179.0" @@ -199,19 +193,9 @@ termcolor = "1.4.1" # android dependencies ndk-sys = "0.5.0" -[patch."https://github.com/gfx-rs/naga"] - -[patch."https://github.com/zakarumych/gpu-descriptor"] -#gpu-descriptor = { path = "../gpu-descriptor/gpu-descriptor" } - -[patch."https://github.com/zakarumych/gpu-alloc"] -#gpu-alloc = { path = "../gpu-alloc/gpu-alloc" } - +# These overrides allow our examples to explicitly depend on release crates [patch.crates-io] -#glow = { path = "../glow" } -#web-sys = { path = "../wasm-bindgen/crates/web-sys" } -#js-sys = { path = "../wasm-bindgen/crates/js-sys" } -#wasm-bindgen = { path = "../wasm-bindgen" } +wgpu = { path = "./wgpu" } [profile.release] lto = "thin" diff --git a/README.md b/README.md index 55b1a415b2..2a4ce0593b 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,13 @@ The API is based on the [WebGPU standard](https://gpuweb.github.io/gpuweb/). It serves as the core of the WebGPU integration in Firefox, Servo, and Deno. +## Quick Links + +| Docs | Examples | Changelog | +|:------------------------------------:|:--------------------------------------------------------------------:|:----------------------------------------------------------------------------:| +| [v24](https://docs.rs/wgpu/) | [v24](https://github.com/gfx-rs/wgpu/tree/v24/examples#readme) | [v24](https://github.com/gfx-rs/wgpu/releases) | +| [`trunk`](https://wgpu.rs/doc/wgpu/) | [`trunk`](https://github.com/gfx-rs/wgpu/tree/trunk/examples#readme) | [`trunk`](https://github.com/gfx-rs/wgpu/blob/trunk/CHANGELOG.md#unreleased) | + ## Repo Overview The repository hosts the following libraries: @@ -42,14 +49,12 @@ Go to [https://wgpu.rs/examples/] to play with our examples in your browser. Req Rust examples can be found at [wgpu/examples](examples). You can run the examples on native with `cargo run --bin wgpu-examples `. See the [list of examples](examples). +If you are new to wgpu and graphics programming, we recommend starting with https://sotrh.github.io/learn-wgpu/. + To run the examples in a browser, run `cargo xtask run-wasm`. Then open `http://localhost:8000` in your browser, and you can choose an example to run. Naturally, in order to display any of the WebGPU based examples, you need to make sure your browser supports it. -If you are looking for a wgpu tutorial, look at the following: - -- https://sotrh.github.io/learn-wgpu/ - ### C/C++ To use wgpu in C/C++, you need [wgpu-native](https://github.com/gfx-rs/wgpu-native). diff --git a/benches/Cargo.toml b/benches/Cargo.toml index d00cecf62d..18c9ade4ed 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -8,6 +8,7 @@ homepage.workspace = true repository.workspace = true keywords.workspace = true license.workspace = true +rust-version.workspace = true autobenches = false publish = false diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000000..35f212da62 --- /dev/null +++ b/clippy.toml @@ -0,0 +1,8 @@ +# NOTE: Other global Clippy config is in top-level Cargo.toml. + +disallowed-types = [ + { path = "std::collections::HashMap", reason = "use hashbrown::HashMap instead" }, + { path = "std::collections::HashSet", reason = "use hashbrown::HashSet instead" }, + { path = "rustc_hash::FxHashMap", reason = "use hashbrown::HashMap instead" }, + { path = "rustc_hash::FxHashSet", reason = "use hashbrown::HashSet instead" }, +] diff --git a/deno_webgpu/Cargo.toml b/deno_webgpu/Cargo.toml index d8776c0d40..9a55242d1b 100644 --- a/deno_webgpu/Cargo.toml +++ b/deno_webgpu/Cargo.toml @@ -16,16 +16,7 @@ path = "lib.rs" # We make all dependencies conditional on not being wasm, # so the whole workspace can built as wasm. [target.'cfg(not(target_arch = "wasm32"))'.dependencies] -deno_core.workspace = true -serde = { workspace = true, features = ["derive"] } -tokio = { workspace = true, features = ["full"] } -wgt = { workspace = true, package = "wgpu-types", features = ["serde"] } -raw-window-handle = { workspace = true } -thiserror.workspace = true - -[target.'cfg(not(target_arch = "wasm32"))'.dependencies.wgpu-core] -workspace = true -features = [ +wgpu-core = { workspace = true, features = [ "raw-window-handle", "trace", "replay", @@ -33,19 +24,30 @@ features = [ "strict_asserts", "wgsl", "gles", -] +] } +wgpu-types = { workspace = true, features = ["serde"] } + +deno_core.workspace = true +hashbrown = { workspace = true, features = ["serde"] } +raw-window-handle = { workspace = true } +serde = { workspace = true, features = ["derive"] } +thiserror.workspace = true +tokio = { workspace = true, features = ["full"] } -# We want the wgpu-core Metal backend on macOS and iOS. -[target.'cfg(target_vendor = "apple")'.dependencies.wgpu-core] -workspace = true -features = ["metal"] +# Apple Platforms +# +# We want the Metal backend. +[target.'cfg(target_vendor = "apple")'.dependencies] +wgpu-core = { workspace = true, features = ["metal"] } -# We want the wgpu-core Direct3D backend on Windows. -[target.'cfg(windows)'.dependencies.wgpu-core] -workspace = true -features = ["dx12"] +# Windows +# +# We want the DX12 backend. +[target.'cfg(windows)'.dependencies] +wgpu-core = { workspace = true, features = ["dx12"] } -# We want the wgpu-core Vulkan backend on Unix (but not Emscripten) and Windows. -[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"))))'.dependencies.wgpu-core] -workspace = true -features = ["vulkan"] +# Windows and Unix (not Emscripten) +# +# We want the Vulkan backend. +[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"))))'.dependencies] +wgpu-core = { workspace = true, features = ["vulkan"] } diff --git a/deno_webgpu/binding.rs b/deno_webgpu/binding.rs index f82e0656ca..c16171e2ba 100644 --- a/deno_webgpu/binding.rs +++ b/deno_webgpu/binding.rs @@ -1,7 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. use super::error::WebGpuResult; -use super::wgpu_types; use deno_core::error::AnyError; use deno_core::op2; use deno_core::OpState; diff --git a/deno_webgpu/buffer.rs b/deno_webgpu/buffer.rs index 0a422f8f6e..978d31368e 100644 --- a/deno_webgpu/buffer.rs +++ b/deno_webgpu/buffer.rs @@ -1,7 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. use super::error::WebGpuResult; -use super::wgpu_types; use deno_core::futures::channel::oneshot; use deno_core::op2; use deno_core::OpState; diff --git a/deno_webgpu/bundle.rs b/deno_webgpu/bundle.rs index d612a6d6ce..48bfbb18d3 100644 --- a/deno_webgpu/bundle.rs +++ b/deno_webgpu/bundle.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use deno_core::op2; use deno_core::OpState; use deno_core::Resource; diff --git a/deno_webgpu/command_encoder.rs b/deno_webgpu/command_encoder.rs index 3eceec8ce0..02adeb0b15 100644 --- a/deno_webgpu/command_encoder.rs +++ b/deno_webgpu/command_encoder.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use crate::WebGpuQuerySet; use deno_core::error::AnyError; use deno_core::op2; @@ -269,7 +268,7 @@ pub fn op_webgpu_command_encoder_begin_compute_pass( let command_encoder = &command_encoder_resource.1; let descriptor = wgpu_core::command::ComputePassDescriptor { label: Some(label), - timestamp_writes: timestamp_writes.as_ref(), + timestamp_writes, }; let (compute_pass, error) = diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs index 56c475cd8f..d4c201bb09 100644 --- a/deno_webgpu/lib.rs +++ b/deno_webgpu/lib.rs @@ -6,14 +6,12 @@ use deno_core::op2; use deno_core::OpState; use deno_core::Resource; use deno_core::ResourceId; +use hashbrown::HashSet; use serde::Deserialize; use serde::Serialize; use std::borrow::Cow; use std::cell::RefCell; -use std::collections::HashSet; use std::rc::Rc; -pub use wgpu_core; -pub use wgt as wgpu_types; use error::WebGpuResult; @@ -395,9 +393,7 @@ pub fn op_webgpu_request_adapter( dx12: wgpu_types::Dx12BackendOptions { shader_compiler: wgpu_types::Dx12Compiler::Fxc, }, - gl: wgpu_types::GlBackendOptions { - gles_minor_version: wgpu_types::Gles3MinorVersion::default(), - }, + gl: wgpu_types::GlBackendOptions::default(), }, }, ))); @@ -662,13 +658,12 @@ pub fn op_webgpu_request_device( memory_hints: wgpu_types::MemoryHints::default(), }; + let webgpu_trace = std::env::var("DENO_WEBGPU_TRACE").unwrap(); + let res = instance.adapter_request_device( adapter, &descriptor, - std::env::var("DENO_WEBGPU_TRACE") - .ok() - .as_ref() - .map(std::path::Path::new), + Some(webgpu_trace.as_str()), None, None, ); diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs index 910211e709..c8ba5f16ff 100644 --- a/deno_webgpu/pipeline.rs +++ b/deno_webgpu/pipeline.rs @@ -1,15 +1,14 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use deno_core::error::AnyError; use deno_core::op2; use deno_core::OpState; use deno_core::Resource; use deno_core::ResourceId; +use hashbrown::HashMap; use serde::Deserialize; use serde::Serialize; use std::borrow::Cow; -use std::collections::HashMap; use std::rc::Rc; use super::error::WebGpuError; diff --git a/deno_webgpu/queue.rs b/deno_webgpu/queue.rs index 808185f17e..a2e7d6a500 100644 --- a/deno_webgpu/queue.rs +++ b/deno_webgpu/queue.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use crate::command_encoder::WebGpuCommandBuffer; use crate::Instance; use deno_core::error::AnyError; diff --git a/deno_webgpu/render_pass.rs b/deno_webgpu/render_pass.rs index 79d798ab52..0be8a1645e 100644 --- a/deno_webgpu/render_pass.rs +++ b/deno_webgpu/render_pass.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use deno_core::op2; use deno_core::OpState; use deno_core::Resource; diff --git a/deno_webgpu/sampler.rs b/deno_webgpu/sampler.rs index 31b8ce2a24..df2b7d7131 100644 --- a/deno_webgpu/sampler.rs +++ b/deno_webgpu/sampler.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use deno_core::op2; use deno_core::OpState; use deno_core::Resource; diff --git a/deno_webgpu/shader.rs b/deno_webgpu/shader.rs index f09d89c63d..7a31b805e3 100644 --- a/deno_webgpu/shader.rs +++ b/deno_webgpu/shader.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use deno_core::op2; use deno_core::OpState; use deno_core::Resource; diff --git a/deno_webgpu/surface.rs b/deno_webgpu/surface.rs index c90d5e3aca..9f30897637 100644 --- a/deno_webgpu/surface.rs +++ b/deno_webgpu/surface.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use super::WebGpuResult; use deno_core::op2; use deno_core::OpState; diff --git a/deno_webgpu/texture.rs b/deno_webgpu/texture.rs index 2ad2814996..decd521565 100644 --- a/deno_webgpu/texture.rs +++ b/deno_webgpu/texture.rs @@ -1,6 +1,5 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use super::wgpu_types; use deno_core::op2; use deno_core::OpState; use deno_core::Resource; diff --git a/etc/specs/ray_tracing.md b/etc/specs/ray_tracing.md index f23b5305a2..64908c9c5f 100644 --- a/etc/specs/ray_tracing.md +++ b/etc/specs/ray_tracing.md @@ -109,11 +109,11 @@ struct RayIntersection { kind: u32, // Distance from starting point, measured in units of `RayDesc::dir`. t: f32, - // Corresponds to `instance.custom_index` where `instance` is the `TlasInstance` + // Corresponds to `instance.custom_data` where `instance` is the `TlasInstance` // that the intersected object was contained in. - instance_custom_index: u32, + instance_custom_data: u32, // The index into the `TlasPackage` to get the `TlasInstance` that the hit object is in - instance_id: u32, + instance_index: u32, // The offset into the shader binding table. Currently, this value is always 0. sbt_record_offset: u32, // The index into the `Blas`'s build descriptor (e.g. if `BlasBuildEntry::geometry` is diff --git a/examples/README.md b/examples/README.md index 799c8a8d5b..20bd282389 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,20 +1,31 @@ -## Structure +> [!NOTE] +> These are the examples for the development version of wgpu. If you want to see the examples for the latest crates.io release +> of wgpu, go to the [latest release branch](https://github.com/gfx-rs/wgpu/tree/v24/examples#readme). -For the simplest examples without using any helping code (see `framework.rs` here), check out: +# Examples -- `hello` for printing adapter information -- `hello_triangle` for graphics and presentation -- `hello_compute` for pure computing +If you are just starting your graphics programming journey entirely, we recommend going through [Learn-WGPU](https://sotrh.github.io/learn-wgpu/) +for a mode guided tutorial, which will also teach you the basics of graphics programming. -### Summary of examples +## Standalone Examples -A summary of the basic examples as split along the graphics and compute "pathways" laid out roughly in order of building on each other. Those further indented, and thus more roughly dependent on more other examples, tend to be more complicated as well as those further down. It should be noted, though, that computing examples, even though they are mentioned further down (because rendering to a window is by far the most common use case), tend to be less complex as they require less surrounding context to create and manage a window to render to. +All the standalone examples are separate crates and include all boilerplate inside the example itself. They can +be cloned out of the repository to serve as a starting point for your own projects and are fully commented. -The rest of the examples are for demonstrating specific features that you can come back for later when you know what those features are. +| Name | Description | Platforms | +|--------|-------------|-----------| +| [hello compute](standalone/01_hello_compute/) | Simplest example and shows how to run a compute shader on a given set of input data and get the results back. | Native-Only | +| [hello window](standalone/02_hello_window/) | Shows how to create a window and render into it. | Native-Only | -#### General +You can also use [`cargo-generate`](https://github.com/cargo-generate/cargo-generate) to easily use these as a basis for your own projects. -- `hello` - Demonstrates the basics of the WGPU library by getting a default Adapter and debugging it to the screen +```sh +cargo generate gfx-rs/wgpu --branch v24 +``` + +## Framework Examples + +These examples use a common framework to handle wgpu init, window creation, and event handling. This allows the example to focus on the unique code in the example itself. Refer to the standalone examples for a more detailed look at the boilerplate code. #### Graphics @@ -44,69 +55,8 @@ The rest of the examples are for demonstrating specific features that you can co - `ray_cube_compute` - Demonstrates using ray queries with a compute shader. - `ray_traced_triangle` - A simpler example demonstrating using ray queries with a compute shader -## Feature matrix - -| Feature | boids | bunnymark | conservative_raster | cube | hello_synchronization | hello_workgroups | mipmap | msaa_line | render_to_texture | repeated_compute | shadow | skybox | stencil_triangles | storage_texture | texture_arrays | uniform_values | water | ray_cube_compute | ray_cube_fragment | ray_scene | ray_shadows | ray_traced_triangle | -|------------------------------| ------ | --------- | ------------------- | ------ | --------------------- | ---------------- | ------ | --------- | ----------------- | ---------------- | ------ | ------ | ----------------- | --------------- | -------------- | -------------- | ------ |------------------|-------------------|-----------|-------------|---------------------| -| vertex attributes | :star: | | | :star: | | | | :star: | | | :star: | :star: | | | :star: | | :star: | | | | | | -| instancing | :star: | | | | | | | | | | | | | | | | | | | | | | -| lines and points | | | :star: | | | | | :star: | | | | | | | | | | | | | | | -| dynamic buffer offsets | | :star: | | | | | | | | | :star: | | | | | | | | | | | | -| implicit layout | | | | | | | :star: | | | | | | | | | | | | | | | | -| sampled color textures | :star: | :star: | :star: | :star: | | | :star: | | | | | :star: | | | :star: | | :star: | | | | | | -| storage textures | :star: | | | | | | | | | | | | | :star: | | | | :star: | | | | :star: | -| comparison samplers | | | | | | | | | | | :star: | | | | | | | | | | | | -| subresource views | | | | | | | :star: | | | | :star: | | | | | | | | | | | | -| cubemaps | | | | | | | | | | | | :star: | | | | | | | | | | | -| multisampling | | | | | | | | :star: | | | | | | | | | | | | | | | -| off-screen rendering | | | :star: | | | | | | :star: | | :star: | | | | | | :star: | | | | | | -| stencil testing | | | | | | | | | | | | | :star: | | | | | | | | | | -| depth testing | | | | | | | | | | | :star: | :star: | | | | | :star: | | | | | | -| depth biasing | | | | | | | | | | | :star: | | | | | | | | | | | | -| read-only depth | | | | | | | | | | | | | | | | | :star: | | | | | | -| blending | | :star: | | :star: | | | | | | | | | | | | | :star: | | | | | | -| render bundles | | | | | | | | :star: | | | | | | | | | :star: | | | | | | -| uniform buffers | | | | | | | | | | | | | | | | :star: | | | | | | | -| compute passes | :star: | | | | :star: | :star: | | | | :star: | | | | :star: | | | | | | | | | -| buffer mapping | | | | | :star: | :star: | | | | :star: | | | | :star: | | | | | | | | | -| error scopes | | | | :star: | | | | | | | | | | | | | | | | | | | -| compute workgroups | | | | | :star: | :star: | | | | | | | | | | | | | | | | | -| compute synchronization | | | | | :star: | | | | | | | | | | | | | | | | | | -| _optional extensions_ | | | | | | | | | | | | | | | :star: | | | | | | | | -| - SPIR-V shaders | | | | | | | | | | | | | | | | | | | | | | | -| - binding array | | | | | | | | | | | | | | | :star: | | | | | | | | -| - push constants | | | | | | | | | | | | | | | | | | | | | :star: | | -| - depth clamping | | | | | | | | | | | :star: | | | | | | | | | | | | -| - compressed textures | | | | | | | | | | | | :star: | | | | | | | | | | | -| - polygon mode | | | | :star: | | | | | | | | | | | | | | | | | | | -| - queries | | | | | | | :star: | | | | | | | | | | | | | | | | -| - conservative rasterization | | | :star: | | | | | | | | | | | | | | | | | | | | -| - ray queries | | | | | | | | | | | | | | | | | | :star: | :star: | :star: | :star: | :star: | -| _integrations_ | | | | | | | | | | | | | | | | | | | | | | | -| - staging belt | | | | | | | | | | | | :star: | | | | | | | | | | | -| - typed arena | | | | | | | | | | | | | | | | | | | | | | | -| - obj loading | | | | | | | | | | | | :star: | | | | | | | | :star: | | | - ## Running on the Web To run the examples in a browser, run `cargo xtask run-wasm`. Then open `http://localhost:8000` in your browser, and you can choose an example to run. Naturally, in order to display any of the WebGPU based examples, you need to make sure your browser supports it. - -Note that many cannot be downleveled to WebGL as WebGL does (among other things) not support storage texture, storage buffers and compute shaders. Running any example using these feature in a browser will require that browser to support WebGPU. - -## Additional notes - -Note that the examples regarding computing build off of each other; repeated_compute extends hello_compute, hello_workgroups assumes you know the basic workflow of GPU computation, and hello_synchronization assumes you know what a workgroup is. - -All the examples use [WGSL](https://gpuweb.github.io/gpuweb/wgsl.html) shaders unless specified otherwise. - -All framework-based examples render to the window and are reftested against the screenshot in the directory. - -## Hacking - -You can record an API trace for any of the framework-based examples by starting them as: - -```sh -mkdir -p trace && WGPU_TRACE=trace cargo run --features trace --bin wgpu-examples -``` diff --git a/examples/Cargo.toml b/examples/features/Cargo.toml similarity index 96% rename from examples/Cargo.toml rename to examples/features/Cargo.toml index 1bef728f3d..21416e7400 100644 --- a/examples/Cargo.toml +++ b/examples/features/Cargo.toml @@ -8,6 +8,7 @@ homepage.workspace = true repository.workspace = true keywords.workspace = true license.workspace = true +rust-version.workspace = true publish = false [package.metadata.cargo-machete] @@ -74,3 +75,6 @@ web-sys = { workspace = true, features = [ [target.'cfg(target_arch = "wasm32")'.dev-dependencies] wasm-bindgen-test.workspace = true + +[lints.clippy] +disallowed_types = "allow" diff --git a/examples/src/boids/README.md b/examples/features/src/boids/README.md similarity index 100% rename from examples/src/boids/README.md rename to examples/features/src/boids/README.md diff --git a/examples/src/boids/compute.wgsl b/examples/features/src/boids/compute.wgsl similarity index 100% rename from examples/src/boids/compute.wgsl rename to examples/features/src/boids/compute.wgsl diff --git a/examples/src/boids/draw.wgsl b/examples/features/src/boids/draw.wgsl similarity index 100% rename from examples/src/boids/draw.wgsl rename to examples/features/src/boids/draw.wgsl diff --git a/examples/src/boids/mod.rs b/examples/features/src/boids/mod.rs similarity index 99% rename from examples/src/boids/mod.rs rename to examples/features/src/boids/mod.rs index 6971a3ef18..bef7256617 100644 --- a/examples/src/boids/mod.rs +++ b/examples/features/src/boids/mod.rs @@ -327,7 +327,7 @@ pub fn main() { static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "boids", // Generated on 1080ti on Vk/Windows - image_path: "/examples/src/boids/screenshot.png", + image_path: "/examples/features/src/boids/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/boids/screenshot.png b/examples/features/src/boids/screenshot.png similarity index 100% rename from examples/src/boids/screenshot.png rename to examples/features/src/boids/screenshot.png diff --git a/examples/src/bunnymark/README.md b/examples/features/src/bunnymark/README.md similarity index 100% rename from examples/src/bunnymark/README.md rename to examples/features/src/bunnymark/README.md diff --git a/examples/src/bunnymark/mod.rs b/examples/features/src/bunnymark/mod.rs similarity index 98% rename from examples/src/bunnymark/mod.rs rename to examples/features/src/bunnymark/mod.rs index 9158d17c11..8ea7b83d43 100644 --- a/examples/src/bunnymark/mod.rs +++ b/examples/features/src/bunnymark/mod.rs @@ -151,7 +151,7 @@ impl crate::framework::Example for Example { let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: None, source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!( - "../../../wgpu-hal/examples/halmark/shader.wgsl" + "../../../../wgpu-hal/examples/halmark/shader.wgsl" ))), }); @@ -238,7 +238,7 @@ impl crate::framework::Example for Example { }); let texture = { - let img_data = include_bytes!("../../../logo.png"); + let img_data = include_bytes!("../../../../logo.png"); let decoder = png::Decoder::new(std::io::Cursor::new(img_data)); let mut reader = decoder.read_info().unwrap(); let mut buf = vec![0; reader.output_buffer_size()]; @@ -440,7 +440,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "bunnymark", - image_path: "/examples/src/bunnymark/screenshot.png", + image_path: "/examples/features/src/bunnymark/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/bunnymark/screenshot.png b/examples/features/src/bunnymark/screenshot.png similarity index 100% rename from examples/src/bunnymark/screenshot.png rename to examples/features/src/bunnymark/screenshot.png diff --git a/examples/src/conservative_raster/README.md b/examples/features/src/conservative_raster/README.md similarity index 100% rename from examples/src/conservative_raster/README.md rename to examples/features/src/conservative_raster/README.md diff --git a/examples/src/conservative_raster/mod.rs b/examples/features/src/conservative_raster/mod.rs similarity index 99% rename from examples/src/conservative_raster/mod.rs rename to examples/features/src/conservative_raster/mod.rs index ca0e9b7110..1db6e09835 100644 --- a/examples/src/conservative_raster/mod.rs +++ b/examples/features/src/conservative_raster/mod.rs @@ -317,7 +317,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "conservative-raster", - image_path: "/examples/src/conservative_raster/screenshot.png", + image_path: "/examples/features/src/conservative_raster/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/conservative_raster/screenshot.png b/examples/features/src/conservative_raster/screenshot.png similarity index 100% rename from examples/src/conservative_raster/screenshot.png rename to examples/features/src/conservative_raster/screenshot.png diff --git a/examples/src/conservative_raster/triangle_and_lines.wgsl b/examples/features/src/conservative_raster/triangle_and_lines.wgsl similarity index 100% rename from examples/src/conservative_raster/triangle_and_lines.wgsl rename to examples/features/src/conservative_raster/triangle_and_lines.wgsl diff --git a/examples/src/conservative_raster/upscale.wgsl b/examples/features/src/conservative_raster/upscale.wgsl similarity index 100% rename from examples/src/conservative_raster/upscale.wgsl rename to examples/features/src/conservative_raster/upscale.wgsl diff --git a/examples/src/cube/README.md b/examples/features/src/cube/README.md similarity index 100% rename from examples/src/cube/README.md rename to examples/features/src/cube/README.md diff --git a/examples/src/cube/mod.rs b/examples/features/src/cube/mod.rs similarity index 99% rename from examples/src/cube/mod.rs rename to examples/features/src/cube/mod.rs index ddeeadc6be..8d0a28ab01 100644 --- a/examples/src/cube/mod.rs +++ b/examples/features/src/cube/mod.rs @@ -383,7 +383,7 @@ pub fn main() { static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "cube", // Generated on 1080ti on Vk/Windows - image_path: "/examples/src/cube/screenshot.png", + image_path: "/examples/features/src/cube/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), @@ -399,7 +399,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest static TEST_LINES: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "cube-lines", // Generated on 1080ti on Vk/Windows - image_path: "/examples/src/cube/screenshot-lines.png", + image_path: "/examples/features/src/cube/screenshot-lines.png", width: 1024, height: 768, optional_features: wgpu::Features::POLYGON_MODE_LINE, diff --git a/examples/src/cube/screenshot-lines.png b/examples/features/src/cube/screenshot-lines.png similarity index 100% rename from examples/src/cube/screenshot-lines.png rename to examples/features/src/cube/screenshot-lines.png diff --git a/examples/src/cube/screenshot.png b/examples/features/src/cube/screenshot.png similarity index 100% rename from examples/src/cube/screenshot.png rename to examples/features/src/cube/screenshot.png diff --git a/examples/src/cube/shader.wgsl b/examples/features/src/cube/shader.wgsl similarity index 100% rename from examples/src/cube/shader.wgsl rename to examples/features/src/cube/shader.wgsl diff --git a/examples/src/framework.rs b/examples/features/src/framework.rs similarity index 99% rename from examples/src/framework.rs rename to examples/features/src/framework.rs index db863526d7..927a5fe122 100644 --- a/examples/src/framework.rs +++ b/examples/features/src/framework.rs @@ -597,7 +597,7 @@ impl From> let bytes = dst_buffer_slice.get_mapped_range().to_vec(); wgpu_test::image::compare_image_output( - dbg!(env!("CARGO_MANIFEST_DIR").to_string() + "/../" + params.image_path), + dbg!(env!("CARGO_MANIFEST_DIR").to_string() + "/../../" + params.image_path), &ctx.adapter_info, params.width, params.height, diff --git a/examples/src/hello_synchronization/README.md b/examples/features/src/hello_synchronization/README.md similarity index 100% rename from examples/src/hello_synchronization/README.md rename to examples/features/src/hello_synchronization/README.md diff --git a/examples/src/hello_synchronization/mod.rs b/examples/features/src/hello_synchronization/mod.rs similarity index 100% rename from examples/src/hello_synchronization/mod.rs rename to examples/features/src/hello_synchronization/mod.rs diff --git a/examples/src/hello_synchronization/shaders.wgsl b/examples/features/src/hello_synchronization/shaders.wgsl similarity index 100% rename from examples/src/hello_synchronization/shaders.wgsl rename to examples/features/src/hello_synchronization/shaders.wgsl diff --git a/examples/src/hello_synchronization/tests.rs b/examples/features/src/hello_synchronization/tests.rs similarity index 100% rename from examples/src/hello_synchronization/tests.rs rename to examples/features/src/hello_synchronization/tests.rs diff --git a/examples/src/hello_triangle/README.md b/examples/features/src/hello_triangle/README.md similarity index 100% rename from examples/src/hello_triangle/README.md rename to examples/features/src/hello_triangle/README.md diff --git a/examples/src/hello_triangle/mod.rs b/examples/features/src/hello_triangle/mod.rs similarity index 100% rename from examples/src/hello_triangle/mod.rs rename to examples/features/src/hello_triangle/mod.rs diff --git a/examples/src/hello_triangle/screenshot.png b/examples/features/src/hello_triangle/screenshot.png similarity index 100% rename from examples/src/hello_triangle/screenshot.png rename to examples/features/src/hello_triangle/screenshot.png diff --git a/examples/src/hello_triangle/shader.wgsl b/examples/features/src/hello_triangle/shader.wgsl similarity index 100% rename from examples/src/hello_triangle/shader.wgsl rename to examples/features/src/hello_triangle/shader.wgsl diff --git a/examples/src/hello_windows/README.md b/examples/features/src/hello_windows/README.md similarity index 100% rename from examples/src/hello_windows/README.md rename to examples/features/src/hello_windows/README.md diff --git a/examples/src/hello_windows/mod.rs b/examples/features/src/hello_windows/mod.rs similarity index 100% rename from examples/src/hello_windows/mod.rs rename to examples/features/src/hello_windows/mod.rs diff --git a/examples/src/hello_windows/screenshot.png b/examples/features/src/hello_windows/screenshot.png similarity index 100% rename from examples/src/hello_windows/screenshot.png rename to examples/features/src/hello_windows/screenshot.png diff --git a/examples/src/hello_workgroups/README.md b/examples/features/src/hello_workgroups/README.md similarity index 100% rename from examples/src/hello_workgroups/README.md rename to examples/features/src/hello_workgroups/README.md diff --git a/examples/src/hello_workgroups/mod.rs b/examples/features/src/hello_workgroups/mod.rs similarity index 100% rename from examples/src/hello_workgroups/mod.rs rename to examples/features/src/hello_workgroups/mod.rs diff --git a/examples/src/hello_workgroups/shader.wgsl b/examples/features/src/hello_workgroups/shader.wgsl similarity index 100% rename from examples/src/hello_workgroups/shader.wgsl rename to examples/features/src/hello_workgroups/shader.wgsl diff --git a/examples/src/lib.rs b/examples/features/src/lib.rs similarity index 95% rename from examples/src/lib.rs rename to examples/features/src/lib.rs index bd714e642e..f56f19c62f 100644 --- a/examples/src/lib.rs +++ b/examples/features/src/lib.rs @@ -8,8 +8,6 @@ pub mod boids; pub mod bunnymark; pub mod conservative_raster; pub mod cube; -pub mod hello; -pub mod hello_compute; pub mod hello_synchronization; pub mod hello_triangle; pub mod hello_windows; diff --git a/examples/src/main.rs b/examples/features/src/main.rs similarity index 95% rename from examples/src/main.rs rename to examples/features/src/main.rs index 463a8bb4f4..d803ba249d 100644 --- a/examples/src/main.rs +++ b/examples/features/src/main.rs @@ -32,18 +32,6 @@ const EXAMPLES: &[ExampleDesc] = &[ webgl: true, webgpu: true, }, - ExampleDesc { - name: "hello", - function: wgpu_examples::hello::main, - webgl: false, // No canvas for WebGL - webgpu: true, - }, - ExampleDesc { - name: "hello_compute", - function: wgpu_examples::hello_compute::main, - webgl: false, // No compute - webgpu: true, - }, ExampleDesc { name: "hello_synchronization", function: wgpu_examples::hello_synchronization::main, diff --git a/examples/src/mipmap/README.md b/examples/features/src/mipmap/README.md similarity index 100% rename from examples/src/mipmap/README.md rename to examples/features/src/mipmap/README.md diff --git a/examples/src/mipmap/blit.wgsl b/examples/features/src/mipmap/blit.wgsl similarity index 100% rename from examples/src/mipmap/blit.wgsl rename to examples/features/src/mipmap/blit.wgsl diff --git a/examples/src/mipmap/draw.wgsl b/examples/features/src/mipmap/draw.wgsl similarity index 100% rename from examples/src/mipmap/draw.wgsl rename to examples/features/src/mipmap/draw.wgsl diff --git a/examples/src/mipmap/mod.rs b/examples/features/src/mipmap/mod.rs similarity index 99% rename from examples/src/mipmap/mod.rs rename to examples/features/src/mipmap/mod.rs index d4d510eb06..8d50fc27a6 100644 --- a/examples/src/mipmap/mod.rs +++ b/examples/features/src/mipmap/mod.rs @@ -508,7 +508,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "mipmap", - image_path: "/examples/src/mipmap/screenshot.png", + image_path: "/examples/features/src/mipmap/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), @@ -521,7 +521,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest #[wgpu_test::gpu_test] static TEST_QUERY: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "mipmap-query", - image_path: "/examples/src/mipmap/screenshot_query.png", + image_path: "/examples/features/src/mipmap/screenshot_query.png", width: 1024, height: 768, optional_features: QUERY_FEATURES, diff --git a/examples/src/mipmap/screenshot-query.png b/examples/features/src/mipmap/screenshot-query.png similarity index 100% rename from examples/src/mipmap/screenshot-query.png rename to examples/features/src/mipmap/screenshot-query.png diff --git a/examples/src/mipmap/screenshot.png b/examples/features/src/mipmap/screenshot.png similarity index 100% rename from examples/src/mipmap/screenshot.png rename to examples/features/src/mipmap/screenshot.png diff --git a/examples/src/mipmap/screenshot_query.png b/examples/features/src/mipmap/screenshot_query.png similarity index 100% rename from examples/src/mipmap/screenshot_query.png rename to examples/features/src/mipmap/screenshot_query.png diff --git a/examples/src/msaa_line/README.md b/examples/features/src/msaa_line/README.md similarity index 100% rename from examples/src/msaa_line/README.md rename to examples/features/src/msaa_line/README.md diff --git a/examples/src/msaa_line/mod.rs b/examples/features/src/msaa_line/mod.rs similarity index 99% rename from examples/src/msaa_line/mod.rs rename to examples/features/src/msaa_line/mod.rs index be9dc74f7a..f956235cc7 100644 --- a/examples/src/msaa_line/mod.rs +++ b/examples/features/src/msaa_line/mod.rs @@ -321,7 +321,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "msaa-line", - image_path: "/examples/src/msaa_line/screenshot.png", + image_path: "/examples/features/src/msaa_line/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES, diff --git a/examples/src/msaa_line/screenshot.png b/examples/features/src/msaa_line/screenshot.png similarity index 100% rename from examples/src/msaa_line/screenshot.png rename to examples/features/src/msaa_line/screenshot.png diff --git a/examples/src/msaa_line/shader.wgsl b/examples/features/src/msaa_line/shader.wgsl similarity index 100% rename from examples/src/msaa_line/shader.wgsl rename to examples/features/src/msaa_line/shader.wgsl diff --git a/examples/src/multiple_render_targets/README.md b/examples/features/src/multiple_render_targets/README.md similarity index 100% rename from examples/src/multiple_render_targets/README.md rename to examples/features/src/multiple_render_targets/README.md diff --git a/examples/src/multiple_render_targets/mod.rs b/examples/features/src/multiple_render_targets/mod.rs similarity index 99% rename from examples/src/multiple_render_targets/mod.rs rename to examples/features/src/multiple_render_targets/mod.rs index c7301024b5..37991354b4 100644 --- a/examples/src/multiple_render_targets/mod.rs +++ b/examples/features/src/multiple_render_targets/mod.rs @@ -534,7 +534,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: EXAMPLE_NAME, - image_path: "/examples/src/multiple_render_targets/screenshot.png", + image_path: "/examples/features/src/multiple_render_targets/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/multiple_render_targets/screenshot.png b/examples/features/src/multiple_render_targets/screenshot.png similarity index 100% rename from examples/src/multiple_render_targets/screenshot.png rename to examples/features/src/multiple_render_targets/screenshot.png diff --git a/examples/src/multiple_render_targets/shader.wgsl b/examples/features/src/multiple_render_targets/shader.wgsl similarity index 100% rename from examples/src/multiple_render_targets/shader.wgsl rename to examples/features/src/multiple_render_targets/shader.wgsl diff --git a/examples/src/ray_cube_compute/README.md b/examples/features/src/ray_cube_compute/README.md similarity index 100% rename from examples/src/ray_cube_compute/README.md rename to examples/features/src/ray_cube_compute/README.md diff --git a/examples/src/ray_cube_compute/blit.wgsl b/examples/features/src/ray_cube_compute/blit.wgsl similarity index 100% rename from examples/src/ray_cube_compute/blit.wgsl rename to examples/features/src/ray_cube_compute/blit.wgsl diff --git a/examples/src/ray_cube_compute/mod.rs b/examples/features/src/ray_cube_compute/mod.rs similarity index 99% rename from examples/src/ray_cube_compute/mod.rs rename to examples/features/src/ray_cube_compute/mod.rs index 743ca17650..ec7864c1fb 100644 --- a/examples/src/ray_cube_compute/mod.rs +++ b/examples/features/src/ray_cube_compute/mod.rs @@ -486,7 +486,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "ray_cube_compute", - image_path: "/examples/src/ray_cube_compute/screenshot.png", + image_path: "/examples/features/src/ray_cube_compute/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/ray_cube_compute/screenshot.png b/examples/features/src/ray_cube_compute/screenshot.png similarity index 100% rename from examples/src/ray_cube_compute/screenshot.png rename to examples/features/src/ray_cube_compute/screenshot.png diff --git a/examples/src/ray_cube_compute/shader.wgsl b/examples/features/src/ray_cube_compute/shader.wgsl similarity index 97% rename from examples/src/ray_cube_compute/shader.wgsl rename to examples/features/src/ray_cube_compute/shader.wgsl index 79ee7ad7e5..cba6e1f848 100644 --- a/examples/src/ray_cube_compute/shader.wgsl +++ b/examples/features/src/ray_cube_compute/shader.wgsl @@ -29,8 +29,8 @@ struct RayDesc { struct RayIntersection { kind: u32, t: f32, - instance_custom_index: u32, - instance_id: u32, + instance_custom_data: u32, + instance_index: u32, sbt_record_offset: u32, geometry_index: u32, primitive_index: u32, diff --git a/examples/src/ray_cube_fragment/README.md b/examples/features/src/ray_cube_fragment/README.md similarity index 100% rename from examples/src/ray_cube_fragment/README.md rename to examples/features/src/ray_cube_fragment/README.md diff --git a/examples/src/ray_cube_fragment/mod.rs b/examples/features/src/ray_cube_fragment/mod.rs similarity index 99% rename from examples/src/ray_cube_fragment/mod.rs rename to examples/features/src/ray_cube_fragment/mod.rs index 9ebf36fb32..05cfa41d33 100644 --- a/examples/src/ray_cube_fragment/mod.rs +++ b/examples/features/src/ray_cube_fragment/mod.rs @@ -373,7 +373,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "ray_cube_fragment", - image_path: "/examples/src/ray_cube_fragment/screenshot.png", + image_path: "/examples/features/src/ray_cube_fragment/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/ray_cube_fragment/screenshot.png b/examples/features/src/ray_cube_fragment/screenshot.png similarity index 100% rename from examples/src/ray_cube_fragment/screenshot.png rename to examples/features/src/ray_cube_fragment/screenshot.png diff --git a/examples/src/ray_cube_fragment/shader.wgsl b/examples/features/src/ray_cube_fragment/shader.wgsl similarity index 100% rename from examples/src/ray_cube_fragment/shader.wgsl rename to examples/features/src/ray_cube_fragment/shader.wgsl diff --git a/examples/src/ray_cube_normals/README.md b/examples/features/src/ray_cube_normals/README.md similarity index 100% rename from examples/src/ray_cube_normals/README.md rename to examples/features/src/ray_cube_normals/README.md diff --git a/examples/src/ray_cube_normals/blit.wgsl b/examples/features/src/ray_cube_normals/blit.wgsl similarity index 100% rename from examples/src/ray_cube_normals/blit.wgsl rename to examples/features/src/ray_cube_normals/blit.wgsl diff --git a/examples/src/ray_cube_normals/mod.rs b/examples/features/src/ray_cube_normals/mod.rs similarity index 100% rename from examples/src/ray_cube_normals/mod.rs rename to examples/features/src/ray_cube_normals/mod.rs diff --git a/examples/src/ray_cube_normals/screenshot.png b/examples/features/src/ray_cube_normals/screenshot.png similarity index 100% rename from examples/src/ray_cube_normals/screenshot.png rename to examples/features/src/ray_cube_normals/screenshot.png diff --git a/examples/src/ray_cube_normals/shader.wgsl b/examples/features/src/ray_cube_normals/shader.wgsl similarity index 100% rename from examples/src/ray_cube_normals/shader.wgsl rename to examples/features/src/ray_cube_normals/shader.wgsl diff --git a/examples/src/ray_scene/cube.mtl b/examples/features/src/ray_scene/cube.mtl similarity index 100% rename from examples/src/ray_scene/cube.mtl rename to examples/features/src/ray_scene/cube.mtl diff --git a/examples/src/ray_scene/cube.obj b/examples/features/src/ray_scene/cube.obj similarity index 100% rename from examples/src/ray_scene/cube.obj rename to examples/features/src/ray_scene/cube.obj diff --git a/examples/src/ray_scene/mod.rs b/examples/features/src/ray_scene/mod.rs similarity index 99% rename from examples/src/ray_scene/mod.rs rename to examples/features/src/ray_scene/mod.rs index 1d681b064e..7acf13a855 100644 --- a/examples/src/ray_scene/mod.rs +++ b/examples/features/src/ray_scene/mod.rs @@ -551,7 +551,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "ray_scene", - image_path: "/examples/src/ray_scene/screenshot.png", + image_path: "/examples/features/src/ray_scene/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/ray_scene/screenshot.png b/examples/features/src/ray_scene/screenshot.png similarity index 100% rename from examples/src/ray_scene/screenshot.png rename to examples/features/src/ray_scene/screenshot.png diff --git a/examples/src/ray_scene/shader.wgsl b/examples/features/src/ray_scene/shader.wgsl similarity index 95% rename from examples/src/ray_scene/shader.wgsl rename to examples/features/src/ray_scene/shader.wgsl index 4e16bd9453..496125ea5c 100644 --- a/examples/src/ray_scene/shader.wgsl +++ b/examples/features/src/ray_scene/shader.wgsl @@ -52,8 +52,8 @@ struct RayDesc { struct RayIntersection { kind: u32, t: f32, - instance_custom_index: u32, - instance_id: u32, + instance_custom_data: u32, + instance_index: u32, sbt_record_offset: u32, geometry_index: u32, primitive_index: u32, @@ -131,7 +131,7 @@ fn fs_main(vertex: VertexOutput) -> @location(0) vec4 { let intersection = rayQueryGetCommittedIntersection(&rq); if (intersection.kind != RAY_QUERY_INTERSECTION_NONE) { - let instance = instances[intersection.instance_custom_index]; + let instance = instances[intersection.instance_custom_data]; let geometry = geometries[intersection.geometry_index + instance.first_geometry]; let index_offset = geometry.first_index; @@ -155,7 +155,7 @@ fn fs_main(vertex: VertexOutput) -> @location(0) vec4 { color = vec4(material.albedo, 1.0); - if(intersection.instance_custom_index == 1u){ + if(intersection.instance_custom_data == 1u){ color = vec4(normal, 1.0); } } diff --git a/examples/src/ray_shadows/README.md b/examples/features/src/ray_shadows/README.md similarity index 100% rename from examples/src/ray_shadows/README.md rename to examples/features/src/ray_shadows/README.md diff --git a/examples/src/ray_shadows/mod.rs b/examples/features/src/ray_shadows/mod.rs similarity index 99% rename from examples/src/ray_shadows/mod.rs rename to examples/features/src/ray_shadows/mod.rs index 278a60d38a..9b0a3671d7 100644 --- a/examples/src/ray_shadows/mod.rs +++ b/examples/features/src/ray_shadows/mod.rs @@ -369,7 +369,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "ray_cube_shadows", - image_path: "/examples/src/ray_shadows/screenshot.png", + image_path: "/examples/features/src/ray_shadows/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/ray_shadows/screenshot.png b/examples/features/src/ray_shadows/screenshot.png similarity index 100% rename from examples/src/ray_shadows/screenshot.png rename to examples/features/src/ray_shadows/screenshot.png diff --git a/examples/src/ray_shadows/shader.wgsl b/examples/features/src/ray_shadows/shader.wgsl similarity index 100% rename from examples/src/ray_shadows/shader.wgsl rename to examples/features/src/ray_shadows/shader.wgsl diff --git a/examples/src/ray_traced_triangle/README.md b/examples/features/src/ray_traced_triangle/README.md similarity index 100% rename from examples/src/ray_traced_triangle/README.md rename to examples/features/src/ray_traced_triangle/README.md diff --git a/examples/src/ray_traced_triangle/blit.wgsl b/examples/features/src/ray_traced_triangle/blit.wgsl similarity index 100% rename from examples/src/ray_traced_triangle/blit.wgsl rename to examples/features/src/ray_traced_triangle/blit.wgsl diff --git a/examples/src/ray_traced_triangle/mod.rs b/examples/features/src/ray_traced_triangle/mod.rs similarity index 99% rename from examples/src/ray_traced_triangle/mod.rs rename to examples/features/src/ray_traced_triangle/mod.rs index 056ccd0c05..99a82dd8ea 100644 --- a/examples/src/ray_traced_triangle/mod.rs +++ b/examples/features/src/ray_traced_triangle/mod.rs @@ -427,7 +427,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "ray_traced_triangle", - image_path: "/examples/src/ray_traced_triangle/screenshot.png", + image_path: "/examples/features/src/ray_traced_triangle/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/ray_traced_triangle/screenshot.png b/examples/features/src/ray_traced_triangle/screenshot.png similarity index 100% rename from examples/src/ray_traced_triangle/screenshot.png rename to examples/features/src/ray_traced_triangle/screenshot.png diff --git a/examples/src/ray_traced_triangle/shader.wgsl b/examples/features/src/ray_traced_triangle/shader.wgsl similarity index 100% rename from examples/src/ray_traced_triangle/shader.wgsl rename to examples/features/src/ray_traced_triangle/shader.wgsl diff --git a/examples/src/render_to_texture/README.md b/examples/features/src/render_to_texture/README.md similarity index 100% rename from examples/src/render_to_texture/README.md rename to examples/features/src/render_to_texture/README.md diff --git a/examples/src/render_to_texture/mod.rs b/examples/features/src/render_to_texture/mod.rs similarity index 100% rename from examples/src/render_to_texture/mod.rs rename to examples/features/src/render_to_texture/mod.rs diff --git a/examples/src/render_to_texture/shader.wgsl b/examples/features/src/render_to_texture/shader.wgsl similarity index 100% rename from examples/src/render_to_texture/shader.wgsl rename to examples/features/src/render_to_texture/shader.wgsl diff --git a/examples/src/repeated_compute/README.md b/examples/features/src/repeated_compute/README.md similarity index 100% rename from examples/src/repeated_compute/README.md rename to examples/features/src/repeated_compute/README.md diff --git a/examples/src/repeated_compute/mod.rs b/examples/features/src/repeated_compute/mod.rs similarity index 100% rename from examples/src/repeated_compute/mod.rs rename to examples/features/src/repeated_compute/mod.rs diff --git a/examples/src/hello_compute/shader.wgsl b/examples/features/src/repeated_compute/shader.wgsl similarity index 100% rename from examples/src/hello_compute/shader.wgsl rename to examples/features/src/repeated_compute/shader.wgsl diff --git a/examples/src/shadow/README.md b/examples/features/src/shadow/README.md similarity index 100% rename from examples/src/shadow/README.md rename to examples/features/src/shadow/README.md diff --git a/examples/src/shadow/mod.rs b/examples/features/src/shadow/mod.rs similarity index 99% rename from examples/src/shadow/mod.rs rename to examples/features/src/shadow/mod.rs index 842f831210..7358b0b92c 100644 --- a/examples/src/shadow/mod.rs +++ b/examples/features/src/shadow/mod.rs @@ -844,7 +844,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "shadow", - image_path: "/examples/src/shadow/screenshot.png", + image_path: "/examples/features/src/shadow/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/shadow/screenshot.png b/examples/features/src/shadow/screenshot.png similarity index 100% rename from examples/src/shadow/screenshot.png rename to examples/features/src/shadow/screenshot.png diff --git a/examples/src/shadow/shader.wgsl b/examples/features/src/shadow/shader.wgsl similarity index 100% rename from examples/src/shadow/shader.wgsl rename to examples/features/src/shadow/shader.wgsl diff --git a/examples/src/skybox/README.md b/examples/features/src/skybox/README.md similarity index 100% rename from examples/src/skybox/README.md rename to examples/features/src/skybox/README.md diff --git a/examples/src/skybox/images/astc.ktx2 b/examples/features/src/skybox/images/astc.ktx2 similarity index 100% rename from examples/src/skybox/images/astc.ktx2 rename to examples/features/src/skybox/images/astc.ktx2 diff --git a/examples/src/skybox/images/bc7.ktx2 b/examples/features/src/skybox/images/bc7.ktx2 similarity index 100% rename from examples/src/skybox/images/bc7.ktx2 rename to examples/features/src/skybox/images/bc7.ktx2 diff --git a/examples/src/skybox/images/etc2.ktx2 b/examples/features/src/skybox/images/etc2.ktx2 similarity index 100% rename from examples/src/skybox/images/etc2.ktx2 rename to examples/features/src/skybox/images/etc2.ktx2 diff --git a/examples/src/skybox/images/generation.bash b/examples/features/src/skybox/images/generation.bash similarity index 100% rename from examples/src/skybox/images/generation.bash rename to examples/features/src/skybox/images/generation.bash diff --git a/examples/src/skybox/images/rgba8.ktx2 b/examples/features/src/skybox/images/rgba8.ktx2 similarity index 100% rename from examples/src/skybox/images/rgba8.ktx2 rename to examples/features/src/skybox/images/rgba8.ktx2 diff --git a/examples/src/skybox/mod.rs b/examples/features/src/skybox/mod.rs similarity index 98% rename from examples/src/skybox/mod.rs rename to examples/features/src/skybox/mod.rs index 16c6eb4b4b..d9ec1c6f0e 100644 --- a/examples/src/skybox/mod.rs +++ b/examples/features/src/skybox/mod.rs @@ -474,7 +474,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "skybox", - image_path: "/examples/src/skybox/screenshot.png", + image_path: "/examples/features/src/skybox/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), @@ -489,7 +489,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest #[wgpu_test::gpu_test] static TEST_BCN: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "skybox-bc7", - image_path: "/examples/src/skybox/screenshot_bc7.png", + image_path: "/examples/features/src/skybox/screenshot_bc7.png", width: 1024, height: 768, optional_features: wgpu::Features::TEXTURE_COMPRESSION_BC, @@ -502,7 +502,7 @@ static TEST_BCN: crate::framework::ExampleTestParams = crate::framework::Example #[wgpu_test::gpu_test] static TEST_ETC2: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "skybox-etc2", - image_path: "/examples/src/skybox/screenshot_etc2.png", + image_path: "/examples/features/src/skybox/screenshot_etc2.png", width: 1024, height: 768, optional_features: wgpu::Features::TEXTURE_COMPRESSION_ETC2, @@ -515,7 +515,7 @@ static TEST_ETC2: crate::framework::ExampleTestParams = crate::framework::Exampl #[wgpu_test::gpu_test] static TEST_ASTC: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "skybox-astc", - image_path: "/examples/src/skybox/screenshot_astc.png", + image_path: "/examples/features/src/skybox/screenshot_astc.png", width: 1024, height: 768, optional_features: wgpu::Features::TEXTURE_COMPRESSION_ASTC, diff --git a/examples/src/skybox/models/teslacyberv3.0.mtl b/examples/features/src/skybox/models/teslacyberv3.0.mtl similarity index 100% rename from examples/src/skybox/models/teslacyberv3.0.mtl rename to examples/features/src/skybox/models/teslacyberv3.0.mtl diff --git a/examples/src/skybox/models/teslacyberv3.0.obj b/examples/features/src/skybox/models/teslacyberv3.0.obj similarity index 100% rename from examples/src/skybox/models/teslacyberv3.0.obj rename to examples/features/src/skybox/models/teslacyberv3.0.obj diff --git a/examples/src/skybox/screenshot.png b/examples/features/src/skybox/screenshot.png similarity index 100% rename from examples/src/skybox/screenshot.png rename to examples/features/src/skybox/screenshot.png diff --git a/examples/src/skybox/screenshot_astc.png b/examples/features/src/skybox/screenshot_astc.png similarity index 100% rename from examples/src/skybox/screenshot_astc.png rename to examples/features/src/skybox/screenshot_astc.png diff --git a/examples/src/skybox/screenshot_bc7.png b/examples/features/src/skybox/screenshot_bc7.png similarity index 100% rename from examples/src/skybox/screenshot_bc7.png rename to examples/features/src/skybox/screenshot_bc7.png diff --git a/examples/src/skybox/screenshot_etc2.png b/examples/features/src/skybox/screenshot_etc2.png similarity index 100% rename from examples/src/skybox/screenshot_etc2.png rename to examples/features/src/skybox/screenshot_etc2.png diff --git a/examples/src/skybox/shader.wgsl b/examples/features/src/skybox/shader.wgsl similarity index 100% rename from examples/src/skybox/shader.wgsl rename to examples/features/src/skybox/shader.wgsl diff --git a/examples/src/srgb_blend/README.md b/examples/features/src/srgb_blend/README.md similarity index 100% rename from examples/src/srgb_blend/README.md rename to examples/features/src/srgb_blend/README.md diff --git a/examples/src/srgb_blend/mod.rs b/examples/features/src/srgb_blend/mod.rs similarity index 98% rename from examples/src/srgb_blend/mod.rs rename to examples/features/src/srgb_blend/mod.rs index d56cea7bce..8471b3cc5c 100644 --- a/examples/src/srgb_blend/mod.rs +++ b/examples/features/src/srgb_blend/mod.rs @@ -225,7 +225,7 @@ pub fn main() { static TEST_SRGB: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "srgb-blend-srg", // Generated on WARP/Windows - image_path: "/examples/src/srgb_blend/screenshot-srgb.png", + image_path: "/examples/features/src/srgb_blend/screenshot-srgb.png", width: 192, height: 192, optional_features: wgpu::Features::default(), @@ -239,7 +239,7 @@ static TEST_SRGB: crate::framework::ExampleTestParams = crate::framework::Exampl static TEST_LINEAR: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "srgb-blend-linear", // Generated on WARP/Windows - image_path: "/examples/src/srgb_blend/screenshot-linear.png", + image_path: "/examples/features/src/srgb_blend/screenshot-linear.png", width: 192, height: 192, optional_features: wgpu::Features::default(), diff --git a/examples/src/srgb_blend/screenshot-linear.png b/examples/features/src/srgb_blend/screenshot-linear.png similarity index 100% rename from examples/src/srgb_blend/screenshot-linear.png rename to examples/features/src/srgb_blend/screenshot-linear.png diff --git a/examples/src/srgb_blend/screenshot-srgb.png b/examples/features/src/srgb_blend/screenshot-srgb.png similarity index 100% rename from examples/src/srgb_blend/screenshot-srgb.png rename to examples/features/src/srgb_blend/screenshot-srgb.png diff --git a/examples/src/srgb_blend/shader.wgsl b/examples/features/src/srgb_blend/shader.wgsl similarity index 100% rename from examples/src/srgb_blend/shader.wgsl rename to examples/features/src/srgb_blend/shader.wgsl diff --git a/examples/src/stencil_triangles/README.md b/examples/features/src/stencil_triangles/README.md similarity index 100% rename from examples/src/stencil_triangles/README.md rename to examples/features/src/stencil_triangles/README.md diff --git a/examples/src/stencil_triangles/mod.rs b/examples/features/src/stencil_triangles/mod.rs similarity index 99% rename from examples/src/stencil_triangles/mod.rs rename to examples/features/src/stencil_triangles/mod.rs index 761a9ef602..b03aedd5ec 100644 --- a/examples/src/stencil_triangles/mod.rs +++ b/examples/features/src/stencil_triangles/mod.rs @@ -246,7 +246,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "stencil-triangles", - image_path: "/examples/src/stencil_triangles/screenshot.png", + image_path: "/examples/features/src/stencil_triangles/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/stencil_triangles/screenshot.png b/examples/features/src/stencil_triangles/screenshot.png similarity index 100% rename from examples/src/stencil_triangles/screenshot.png rename to examples/features/src/stencil_triangles/screenshot.png diff --git a/examples/src/stencil_triangles/shader.wgsl b/examples/features/src/stencil_triangles/shader.wgsl similarity index 100% rename from examples/src/stencil_triangles/shader.wgsl rename to examples/features/src/stencil_triangles/shader.wgsl diff --git a/examples/src/storage_texture/README.md b/examples/features/src/storage_texture/README.md similarity index 100% rename from examples/src/storage_texture/README.md rename to examples/features/src/storage_texture/README.md diff --git a/examples/src/storage_texture/example.png b/examples/features/src/storage_texture/example.png similarity index 100% rename from examples/src/storage_texture/example.png rename to examples/features/src/storage_texture/example.png diff --git a/examples/src/storage_texture/mod.rs b/examples/features/src/storage_texture/mod.rs similarity index 100% rename from examples/src/storage_texture/mod.rs rename to examples/features/src/storage_texture/mod.rs diff --git a/examples/src/storage_texture/shader.wgsl b/examples/features/src/storage_texture/shader.wgsl similarity index 100% rename from examples/src/storage_texture/shader.wgsl rename to examples/features/src/storage_texture/shader.wgsl diff --git a/examples/src/texture_arrays/README.md b/examples/features/src/texture_arrays/README.md similarity index 100% rename from examples/src/texture_arrays/README.md rename to examples/features/src/texture_arrays/README.md diff --git a/examples/src/texture_arrays/indexing.wgsl b/examples/features/src/texture_arrays/indexing.wgsl similarity index 98% rename from examples/src/texture_arrays/indexing.wgsl rename to examples/features/src/texture_arrays/indexing.wgsl index 0f952264c4..aad090c12e 100644 --- a/examples/src/texture_arrays/indexing.wgsl +++ b/examples/features/src/texture_arrays/indexing.wgsl @@ -35,7 +35,7 @@ struct Uniforms { index: u32, } -@group(0) @binding(3) +@group(1) @binding(0) var uniforms: Uniforms; @fragment diff --git a/examples/src/texture_arrays/mod.rs b/examples/features/src/texture_arrays/mod.rs similarity index 90% rename from examples/src/texture_arrays/mod.rs rename to examples/features/src/texture_arrays/mod.rs index 7cac05c384..a2bbd44130 100644 --- a/examples/src/texture_arrays/mod.rs +++ b/examples/features/src/texture_arrays/mod.rs @@ -64,6 +64,7 @@ fn create_texture_data(color: Color) -> [u8; 4] { struct Example { pipeline: wgpu::RenderPipeline, bind_group: wgpu::BindGroup, + uniform_bind_group: wgpu::BindGroup, vertex_buffer: wgpu::Buffer, index_buffer: wgpu::Buffer, index_format: wgpu::IndexFormat, @@ -261,8 +262,14 @@ impl crate::framework::Example for Example { ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), count: NonZeroU32::new(2), }, - wgpu::BindGroupLayoutEntry { - binding: 3, + ], + }); + + let uniform_bind_group_layout = + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("uniform bind group layout"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, visibility: wgpu::ShaderStages::FRAGMENT, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, @@ -270,9 +277,8 @@ impl crate::framework::Example for Example { min_binding_size: Some(NonZeroU64::new(4).unwrap()), }, count: None, - }, - ], - }); + }], + }); let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { entries: &[ @@ -294,22 +300,27 @@ impl crate::framework::Example for Example { binding: 2, resource: wgpu::BindingResource::SamplerArray(&[&sampler, &sampler]), }, - wgpu::BindGroupEntry { - binding: 3, - resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { - buffer: &texture_index_buffer, - offset: 0, - size: Some(NonZeroU64::new(4).unwrap()), - }), - }, ], layout: &bind_group_layout, label: Some("bind group"), }); + let uniform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &texture_index_buffer, + offset: 0, + size: Some(NonZeroU64::new(4).unwrap()), + }), + }], + layout: &uniform_bind_group_layout, + label: Some("uniform bind group"), + }); + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: Some("main"), - bind_group_layouts: &[&bind_group_layout], + bind_group_layouts: &[&bind_group_layout, &uniform_bind_group_layout], push_constant_ranges: &[], }); @@ -347,6 +358,7 @@ impl crate::framework::Example for Example { Self { pipeline, bind_group, + uniform_bind_group, vertex_buffer, index_buffer, index_format, @@ -388,12 +400,14 @@ impl crate::framework::Example for Example { rpass.set_vertex_buffer(0, self.vertex_buffer.slice(..)); rpass.set_index_buffer(self.index_buffer.slice(..), self.index_format); if self.uniform_workaround { - rpass.set_bind_group(0, &self.bind_group, &[0]); + rpass.set_bind_group(0, &self.bind_group, &[]); + rpass.set_bind_group(1, &self.uniform_bind_group, &[0]); rpass.draw_indexed(0..6, 0, 0..1); - rpass.set_bind_group(0, &self.bind_group, &[256]); + rpass.set_bind_group(1, &self.uniform_bind_group, &[256]); rpass.draw_indexed(6..12, 0, 0..1); } else { - rpass.set_bind_group(0, &self.bind_group, &[0]); + rpass.set_bind_group(0, &self.bind_group, &[]); + rpass.set_bind_group(1, &self.uniform_bind_group, &[0]); rpass.draw_indexed(0..12, 0, 0..1); } @@ -411,7 +425,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "texture-arrays", - image_path: "/examples/src/texture_arrays/screenshot.png", + image_path: "/examples/features/src/texture_arrays/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::empty(), @@ -424,7 +438,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest #[wgpu_test::gpu_test] static TEST_UNIFORM: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "texture-arrays-uniform", - image_path: "/examples/src/texture_arrays/screenshot.png", + image_path: "/examples/features/src/texture_arrays/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::empty(), @@ -438,7 +452,7 @@ static TEST_UNIFORM: crate::framework::ExampleTestParams = crate::framework::Exa static TEST_NON_UNIFORM: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "texture-arrays-non-uniform", - image_path: "/examples/src/texture_arrays/screenshot.png", + image_path: "/examples/features/src/texture_arrays/screenshot.png", width: 1024, height: 768, optional_features: diff --git a/examples/src/texture_arrays/non_uniform_indexing.wgsl b/examples/features/src/texture_arrays/non_uniform_indexing.wgsl similarity index 100% rename from examples/src/texture_arrays/non_uniform_indexing.wgsl rename to examples/features/src/texture_arrays/non_uniform_indexing.wgsl diff --git a/examples/src/texture_arrays/screenshot.png b/examples/features/src/texture_arrays/screenshot.png similarity index 100% rename from examples/src/texture_arrays/screenshot.png rename to examples/features/src/texture_arrays/screenshot.png diff --git a/examples/src/timestamp_queries/README.md b/examples/features/src/timestamp_queries/README.md similarity index 100% rename from examples/src/timestamp_queries/README.md rename to examples/features/src/timestamp_queries/README.md diff --git a/examples/src/timestamp_queries/mod.rs b/examples/features/src/timestamp_queries/mod.rs similarity index 100% rename from examples/src/timestamp_queries/mod.rs rename to examples/features/src/timestamp_queries/mod.rs diff --git a/examples/src/timestamp_queries/shader.wgsl b/examples/features/src/timestamp_queries/shader.wgsl similarity index 100% rename from examples/src/timestamp_queries/shader.wgsl rename to examples/features/src/timestamp_queries/shader.wgsl diff --git a/examples/src/uniform_values/README.md b/examples/features/src/uniform_values/README.md similarity index 100% rename from examples/src/uniform_values/README.md rename to examples/features/src/uniform_values/README.md diff --git a/examples/src/uniform_values/mod.rs b/examples/features/src/uniform_values/mod.rs similarity index 100% rename from examples/src/uniform_values/mod.rs rename to examples/features/src/uniform_values/mod.rs diff --git a/examples/src/uniform_values/screenshot1.png b/examples/features/src/uniform_values/screenshot1.png similarity index 100% rename from examples/src/uniform_values/screenshot1.png rename to examples/features/src/uniform_values/screenshot1.png diff --git a/examples/src/uniform_values/screenshot2.png b/examples/features/src/uniform_values/screenshot2.png similarity index 100% rename from examples/src/uniform_values/screenshot2.png rename to examples/features/src/uniform_values/screenshot2.png diff --git a/examples/src/uniform_values/screenshot3.png b/examples/features/src/uniform_values/screenshot3.png similarity index 100% rename from examples/src/uniform_values/screenshot3.png rename to examples/features/src/uniform_values/screenshot3.png diff --git a/examples/src/uniform_values/shader.wgsl b/examples/features/src/uniform_values/shader.wgsl similarity index 100% rename from examples/src/uniform_values/shader.wgsl rename to examples/features/src/uniform_values/shader.wgsl diff --git a/examples/src/utils.rs b/examples/features/src/utils.rs similarity index 100% rename from examples/src/utils.rs rename to examples/features/src/utils.rs diff --git a/examples/src/water/README.md b/examples/features/src/water/README.md similarity index 100% rename from examples/src/water/README.md rename to examples/features/src/water/README.md diff --git a/examples/src/water/mod.rs b/examples/features/src/water/mod.rs similarity index 99% rename from examples/src/water/mod.rs rename to examples/features/src/water/mod.rs index b673b711bd..1e4b164020 100644 --- a/examples/src/water/mod.rs +++ b/examples/features/src/water/mod.rs @@ -822,7 +822,7 @@ pub fn main() { #[wgpu_test::gpu_test] static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTestParams { name: "water", - image_path: "/examples/src/water/screenshot.png", + image_path: "/examples/features/src/water/screenshot.png", width: 1024, height: 768, optional_features: wgpu::Features::default(), diff --git a/examples/src/water/point_gen.rs b/examples/features/src/water/point_gen.rs similarity index 100% rename from examples/src/water/point_gen.rs rename to examples/features/src/water/point_gen.rs diff --git a/examples/src/water/screenshot.png b/examples/features/src/water/screenshot.png similarity index 100% rename from examples/src/water/screenshot.png rename to examples/features/src/water/screenshot.png diff --git a/examples/src/water/terrain.wgsl b/examples/features/src/water/terrain.wgsl similarity index 100% rename from examples/src/water/terrain.wgsl rename to examples/features/src/water/terrain.wgsl diff --git a/examples/src/water/water.wgsl b/examples/features/src/water/water.wgsl similarity index 100% rename from examples/src/water/water.wgsl rename to examples/features/src/water/water.wgsl diff --git a/examples/static/index.html b/examples/features/web-static/index.html similarity index 100% rename from examples/static/index.html rename to examples/features/web-static/index.html diff --git a/examples/src/hello/README.md b/examples/src/hello/README.md deleted file mode 100644 index 1d51a6b83b..0000000000 --- a/examples/src/hello/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# hello - -This example prints output describing the adapter in use. - -## To Run - -``` -cargo run --bin wgpu-examples hello -``` - -## Example output - -``` -# You might see different output as it depends on your graphics card and drivers -Available adapters: - AdapterInfo { name: "AMD RADV VEGA10", vendor: 4098, device: 26751, device_type: DiscreteGpu, backend: Vulkan } - AdapterInfo { name: "llvmpipe (LLVM 12.0.0, 256 bits)", vendor: 65541, device: 0, device_type: Cpu, backend: Vulkan } - AdapterInfo { name: "Radeon RX Vega (VEGA10, DRM 3.41.0, 5.13.0-52-generic, LLVM 12.0.0)", vendor: 4098, device: 0, device_type: Other, backend: Gl } -Selected adapter: AdapterInfo { name: "AMD RADV VEGA10", vendor: 4098, device: 26751, device_type: DiscreteGpu, backend: Vulkan } -``` diff --git a/examples/src/hello/mod.rs b/examples/src/hello/mod.rs deleted file mode 100644 index ba6ea2553f..0000000000 --- a/examples/src/hello/mod.rs +++ /dev/null @@ -1,36 +0,0 @@ -/// This example shows how to describe the adapter in use. -async fn run() { - let adapter = { - let instance = wgpu::Instance::default(); - #[cfg(not(target_arch = "wasm32"))] - { - log::info!("Available adapters:"); - for a in instance.enumerate_adapters(wgpu::Backends::all()) { - log::info!(" {:?}", a.get_info()) - } - } - instance - .request_adapter(&wgpu::RequestAdapterOptions::default()) - .await - .unwrap() - }; - - log::info!("Selected adapter: {:?}", adapter.get_info()) -} - -pub fn main() { - #[cfg(not(target_arch = "wasm32"))] - { - env_logger::builder() - .filter(Some(module_path!()), log::LevelFilter::Info) - .parse_default_env() - .init(); - pollster::block_on(run()); - } - #[cfg(target_arch = "wasm32")] - { - std::panic::set_hook(Box::new(console_error_panic_hook::hook)); - console_log::init().expect("could not initialize logger"); - wasm_bindgen_futures::spawn_local(run()); - } -} diff --git a/examples/src/hello_compute/README.md b/examples/src/hello_compute/README.md deleted file mode 100644 index 8b3f3e111d..0000000000 --- a/examples/src/hello_compute/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# hello-compute - -Runs a compute shader to determine the number of iterations of the rules from -Collatz Conjecture - -- If n is even, n = n/2 -- If n is odd, n = 3n+1 - -that it will take to finish and reach the number `1`. - -## To Run - -``` -# Pass in any 4 numbers as arguments -RUST_LOG=hello_compute cargo run --bin wgpu-examples hello_compute 1 4 3 295 -``` - -## Example Output - -``` -[2020-04-25T11:15:33Z INFO hello_compute] Steps: [0, 2, 7, 55] -``` diff --git a/examples/src/hello_compute/mod.rs b/examples/src/hello_compute/mod.rs deleted file mode 100644 index 0f38a1c4b8..0000000000 --- a/examples/src/hello_compute/mod.rs +++ /dev/null @@ -1,193 +0,0 @@ -use std::str::FromStr; -use wgpu::util::DeviceExt; - -// Indicates a u32 overflow in an intermediate Collatz value -const OVERFLOW: u32 = 0xffffffff; - -async fn run() { - let numbers = if std::env::args().len() <= 2 { - let default = vec![1, 2, 3, 4]; - println!("No numbers were provided, defaulting to {default:?}"); - default - } else { - std::env::args() - .skip(2) - .map(|s| u32::from_str(&s).expect("You must pass a list of positive integers!")) - .collect() - }; - - let steps = execute_gpu(&numbers).await.unwrap(); - - let disp_steps: Vec = steps - .iter() - .map(|&n| match n { - OVERFLOW => "OVERFLOW".to_string(), - _ => n.to_string(), - }) - .collect(); - - println!("Steps: [{}]", disp_steps.join(", ")); - #[cfg(target_arch = "wasm32")] - log::info!("Steps: [{}]", disp_steps.join(", ")); -} - -async fn execute_gpu(numbers: &[u32]) -> Option> { - // Instantiates instance of WebGPU - let instance = wgpu::Instance::default(); - - // `request_adapter` instantiates the general connection to the GPU - let adapter = instance - .request_adapter(&wgpu::RequestAdapterOptions::default()) - .await?; - - // `request_device` instantiates the feature specific connection to the GPU, defining some parameters, - // `features` being the available features. - let (device, queue) = adapter - .request_device( - &wgpu::DeviceDescriptor { - label: None, - required_features: wgpu::Features::empty(), - required_limits: wgpu::Limits::downlevel_defaults(), - memory_hints: wgpu::MemoryHints::MemoryUsage, - }, - None, - ) - .await - .unwrap(); - - execute_gpu_inner(&device, &queue, numbers).await -} - -async fn execute_gpu_inner( - device: &wgpu::Device, - queue: &wgpu::Queue, - numbers: &[u32], -) -> Option> { - // Loads the shader from WGSL - let cs_module = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl")); - - // Gets the size in bytes of the buffer. - let size = size_of_val(numbers) as wgpu::BufferAddress; - - // Instantiates buffer without data. - // `usage` of buffer specifies how it can be used: - // `BufferUsages::MAP_READ` allows it to be read (outside the shader). - // `BufferUsages::COPY_DST` allows it to be the destination of the copy. - let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: None, - size, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - - // Instantiates buffer with data (`numbers`). - // Usage allowing the buffer to be: - // A storage buffer (can be bound within a bind group and thus available to a shader). - // The destination of a copy. - // The source of a copy. - let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { - label: Some("Storage Buffer"), - contents: bytemuck::cast_slice(numbers), - usage: wgpu::BufferUsages::STORAGE - | wgpu::BufferUsages::COPY_DST - | wgpu::BufferUsages::COPY_SRC, - }); - - // A bind group defines how buffers are accessed by shaders. - // It is to WebGPU what a descriptor set is to Vulkan. - // `binding` here refers to the `binding` of a buffer in the shader (`layout(set = 0, binding = 0) buffer`). - - // A pipeline specifies the operation of a shader - - // Instantiates the pipeline. - let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { - label: None, - layout: None, - module: &cs_module, - entry_point: Some("main"), - compilation_options: Default::default(), - cache: None, - }); - - // Instantiates the bind group, once again specifying the binding of buffers. - let bind_group_layout = compute_pipeline.get_bind_group_layout(0); - let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { - label: None, - layout: &bind_group_layout, - entries: &[wgpu::BindGroupEntry { - binding: 0, - resource: storage_buffer.as_entire_binding(), - }], - }); - - // A command encoder executes one or many pipelines. - // It is to WebGPU what a command buffer is to Vulkan. - let mut encoder = - device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); - { - let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { - label: None, - timestamp_writes: None, - }); - cpass.set_pipeline(&compute_pipeline); - cpass.set_bind_group(0, &bind_group, &[]); - cpass.insert_debug_marker("compute collatz iterations"); - cpass.dispatch_workgroups(numbers.len() as u32, 1, 1); // Number of cells to run, the (x,y,z) size of item being processed - } - // Sets adds copy operation to command encoder. - // Will copy data from storage buffer on GPU to staging buffer on CPU. - encoder.copy_buffer_to_buffer(&storage_buffer, 0, &staging_buffer, 0, size); - - // Submits command encoder for processing - queue.submit(Some(encoder.finish())); - - // Note that we're not calling `.await` here. - let buffer_slice = staging_buffer.slice(..); - // Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished. - let (sender, receiver) = flume::bounded(1); - buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap()); - - // Poll the device in a blocking manner so that our future resolves. - // In an actual application, `device.poll(...)` should - // be called in an event loop or on another thread. - device.poll(wgpu::Maintain::wait()).panic_on_timeout(); - - // Awaits until `buffer_future` can be read from - if let Ok(Ok(())) = receiver.recv_async().await { - // Gets contents of buffer - let data = buffer_slice.get_mapped_range(); - // Since contents are got in bytes, this converts these bytes back to u32 - let result = bytemuck::cast_slice(&data).to_vec(); - - // With the current interface, we have to make sure all mapped views are - // dropped before we unmap the buffer. - drop(data); - staging_buffer.unmap(); // Unmaps buffer from memory - // If you are familiar with C++ these 2 lines can be thought of similarly to: - // delete myPointer; - // myPointer = NULL; - // It effectively frees the memory - - // Returns data from buffer - Some(result) - } else { - panic!("failed to run compute on gpu!") - } -} - -pub fn main() { - #[cfg(not(target_arch = "wasm32"))] - { - env_logger::init(); - pollster::block_on(run()); - } - #[cfg(target_arch = "wasm32")] - { - std::panic::set_hook(Box::new(console_error_panic_hook::hook)); - console_log::init().expect("could not initialize logger"); - wasm_bindgen_futures::spawn_local(run()); - } -} - -#[cfg(test)] -mod tests; diff --git a/examples/src/hello_compute/tests.rs b/examples/src/hello_compute/tests.rs deleted file mode 100644 index f4554d7de5..0000000000 --- a/examples/src/hello_compute/tests.rs +++ /dev/null @@ -1,106 +0,0 @@ -use super::*; -use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters}; - -#[gpu_test] -static COMPUTE_1: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) - .limits(wgpu::Limits::downlevel_defaults()) - .skip(FailureCase::adapter("V3D")), - ) - .run_async(|ctx| { - let input = &[1, 2, 3, 4]; - - async move { assert_execute_gpu(&ctx.device, &ctx.queue, input, &[0, 1, 7, 2]).await } - }); - -#[gpu_test] -static COMPUTE_2: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) - .limits(wgpu::Limits::downlevel_defaults()) - .skip(FailureCase::adapter("V3D")), - ) - .run_async(|ctx| { - let input = &[5, 23, 10, 9]; - - async move { assert_execute_gpu(&ctx.device, &ctx.queue, input, &[5, 15, 6, 19]).await } - }); - -#[gpu_test] -static COMPUTE_OVERFLOW: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) - .limits(wgpu::Limits::downlevel_defaults()) - .skip(FailureCase::adapter("V3D")), - ) - .run_async(|ctx| { - let input = &[77031, 837799, 8400511, 63728127]; - async move { - assert_execute_gpu( - &ctx.device, - &ctx.queue, - input, - &[350, 524, OVERFLOW, OVERFLOW], - ) - .await - } - }); - -#[cfg(not(target_arch = "wasm32"))] -#[gpu_test] -static MULTITHREADED_COMPUTE: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) - .limits(wgpu::Limits::downlevel_defaults()) - .skip(FailureCase::adapter("V3D")), - ) - .run_sync(|ctx| { - use std::{sync::mpsc, sync::Arc, thread, time::Duration}; - - let ctx = Arc::new(ctx); - - let thread_count = 8; - - let (tx, rx) = mpsc::channel(); - let workers: Vec<_> = (0..thread_count) - .map(move |_| { - let tx = tx.clone(); - let ctx = Arc::clone(&ctx); - thread::spawn(move || { - let input = &[100, 100, 100]; - pollster::block_on(assert_execute_gpu( - &ctx.device, - &ctx.queue, - input, - &[25, 25, 25], - )); - tx.send(true).unwrap(); - }) - }) - .collect(); - - for _ in 0..thread_count { - rx.recv_timeout(Duration::from_secs(10)) - .expect("A thread never completed."); - } - - for worker in workers { - worker.join().unwrap(); - } - }); - -async fn assert_execute_gpu( - device: &wgpu::Device, - queue: &wgpu::Queue, - input: &[u32], - expected: &[u32], -) { - if let Some(produced) = execute_gpu_inner(device, queue, input).await { - assert_eq!(produced, expected); - } -} diff --git a/examples/src/repeated_compute/shader.wgsl b/examples/src/repeated_compute/shader.wgsl deleted file mode 100644 index 41af4363a2..0000000000 --- a/examples/src/repeated_compute/shader.wgsl +++ /dev/null @@ -1,38 +0,0 @@ -@group(0) -@binding(0) -var v_indices: array; // this is used as both input and output for convenience - -// The Collatz Conjecture states that for any integer n: -// If n is even, n = n/2 -// If n is odd, n = 3n+1 -// And repeat this process for each new n, you will always eventually reach 1. -// Though the conjecture has not been proven, no counterexample has ever been found. -// This function returns how many times this recurrence needs to be applied to reach 1. -fn collatz_iterations(n_base: u32) -> u32{ - var n: u32 = n_base; - var i: u32 = 0u; - loop { - if (n <= 1u) { - break; - } - if (n % 2u == 0u) { - n = n / 2u; - } - else { - // Overflow? (i.e. 3*n + 1 > 0xffffffffu?) - if (n >= 1431655765u) { // 0x55555555u - return 4294967295u; // 0xffffffffu - } - - n = 3u * n + 1u; - } - i = i + 1u; - } - return i; -} - -@compute -@workgroup_size(1) -fn main(@builtin(global_invocation_id) global_id: vec3) { - v_indices[global_id.x] = collatz_iterations(v_indices[global_id.x]); -} diff --git a/examples/standalone/01_hello_compute/Cargo.toml b/examples/standalone/01_hello_compute/Cargo.toml new file mode 100644 index 0000000000..11c6b4b99d --- /dev/null +++ b/examples/standalone/01_hello_compute/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "wgpu-example-01-hello-compute" +edition = "2021" +rust-version = "1.83" +publish = false + +[dependencies] +bytemuck = "1" +env_logger = "0.11.6" +pollster = "0.4" +wgpu = "24.0.0" diff --git a/examples/standalone/01_hello_compute/cargo-generate.toml b/examples/standalone/01_hello_compute/cargo-generate.toml new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/examples/standalone/01_hello_compute/cargo-generate.toml @@ -0,0 +1 @@ + diff --git a/examples/standalone/01_hello_compute/src/main.rs b/examples/standalone/01_hello_compute/src/main.rs new file mode 100644 index 0000000000..9decdef0df --- /dev/null +++ b/examples/standalone/01_hello_compute/src/main.rs @@ -0,0 +1,255 @@ +/// To serve as an introduction to the wgpu api, we will implement a simple +/// compute shader which takes a list of numbers on the CPU and doubles them on the GPU. +/// +/// While this isn't a very practical example, you will see all the major components +/// of using wgpu headlessly, including getting a device, running a shader, and transferring +/// data between the CPU and GPU. +/// +/// If you time the recording and execution of this example you will certainly see that +/// running on the gpu is slower than doing the same calculation on the cpu. This is because +/// floating point multiplication is a very simple operation so the transfer/submission overhead +/// is quite a lot higher than the actual computation. This is normal and shows that the GPU +/// needs a lot higher work/transfer ratio to come out ahead. +use std::{num::NonZeroU64, str::FromStr}; +use wgpu::util::DeviceExt; + +fn main() { + // Parse all arguments as floats. We need to skip argument 0, which is the name of the program. + let arguments: Vec = std::env::args() + .skip(1) + .map(|s| { + f32::from_str(&s).unwrap_or_else(|_| panic!("Cannot parse argument {s:?} as a float.")) + }) + .collect(); + + if arguments.is_empty() { + println!("No arguments provided. Please provide a list of numbers to double."); + return; + } + + println!("Parsed {} arguments", arguments.len()); + + // wgpu uses `log` for all of our logging, so we initialize a logger with the `env_logger` crate. + // + // To change the log level, set the `RUST_LOG` environment variable. See the `env_logger` + // documentation for more information. + env_logger::init(); + + // We first initialize an wgpu `Instance`, which contains any "global" state wgpu needs. + // + // This is what loads the vulkan/dx12/metal/opengl libraries. + let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); + + // We then create an `Adapter` which represents a physical gpu in the system. It allows + // us to query information about it and create a `Device` from it. + // + // This function is asynchronous in WebGPU, so request_adapter returns a future. On native/webgl + // the future resolves immediately, so we can block on it without harm. + let adapter = + pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions::default())) + .expect("Failed to create adapter"); + + // Print out some basic information about the adapter. + println!("Running on Adapter: {:#?}", adapter.get_info()); + + // Check to see if the adapter supports compute shaders. While WebGPU guarantees support for + // compute shaders, wgpu supports a wider range of devices through the use of "downlevel" devices. + let downlevel_capabilities = adapter.get_downlevel_capabilities(); + if !downlevel_capabilities + .flags + .contains(wgpu::DownlevelFlags::COMPUTE_SHADERS) + { + panic!("Adapter does not support compute shaders"); + } + + // We then create a `Device` and a `Queue` from the `Adapter`. + // + // The `Device` is used to create and manage GPU resources. + // The `Queue` is a queue used to submit work for the GPU to process. + let (device, queue) = pollster::block_on(adapter.request_device( + &wgpu::DeviceDescriptor { + label: None, + required_features: wgpu::Features::empty(), + required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::MemoryUsage, + }, + None, + )) + .expect("Failed to create device"); + + // Create a shader module from our shader code. This will parse and validate the shader. + // + // `include_wgsl` is a macro provided by wgpu like `include_str` which constructs a ShaderModuleDescriptor. + // If you want to load shaders differently, you can construct the ShaderModuleDescriptor manually. + let module = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl")); + + // Create a buffer with the data we want to process on the GPU. + // + // `create_buffer_init` is a utility provided by `wgpu::util::DeviceExt` which simplifies creating + // a buffer with some initial data. + // + // We use the `bytemuck` crate to cast the slice of f32 to a &[u8] to be uploaded to the GPU. + let input_data_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: None, + contents: bytemuck::cast_slice(&arguments), + usage: wgpu::BufferUsages::STORAGE, + }); + + // Now we create a buffer to store the output data. + let output_data_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: input_data_buffer.size(), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + // Finally we create a buffer which can be read by the CPU. This buffer is how we will read + // the data. We need to use a separate buffer because we need to have a usage of `MAP_READ`, + // and that usage can only be used with `COPY_DST`. + let download_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: input_data_buffer.size(), + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + // A bind group layout describes the types of resources that a bind group can contain. Think + // of this like a C-style header declaration, ensuring both the pipeline and bind group agree + // on the types of resources. + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[ + // Input buffer + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: true }, + // This is the size of a single element in the buffer. + min_binding_size: Some(NonZeroU64::new(4).unwrap()), + has_dynamic_offset: false, + }, + count: None, + }, + // Output buffer + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + // This is the size of a single element in the buffer. + min_binding_size: Some(NonZeroU64::new(4).unwrap()), + has_dynamic_offset: false, + }, + count: None, + }, + ], + }); + + // The bind group contains the actual resources to bind to the pipeline. + // + // Even when the buffers are individually dropped, wgpu will keep the bind group and buffers + // alive until the bind group itself is dropped. + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: input_data_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: output_data_buffer.as_entire_binding(), + }, + ], + }); + + // The pipeline layout describes the bind groups that a pipeline expects + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + // The pipeline is the ready-to-go program state for the GPU. It contains the shader modules, + // the interfaces (bind group layouts) and the shader entry point. + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &module, + entry_point: Some("doubleMe"), + compilation_options: wgpu::PipelineCompilationOptions::default(), + cache: None, + }); + + // The command encoder allows us to record commands that we will later submit to the GPU. + let mut encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + // A compute pass is a single series of compute operations. While we are recording a compute + // pass, we cannot record to the encoder. + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + + // Set the pipeline that we want to use + compute_pass.set_pipeline(&pipeline); + // Set the bind group that we want to use + compute_pass.set_bind_group(0, &bind_group, &[]); + + // Now we dispatch a series of workgroups. Each workgroup is a 3D grid of individual programs. + // + // We defined the workgroup size in the shader as 64x1x1. So in order to process all of our + // inputs, we ceiling divide the number of inputs by 64. If the user passes 32 inputs, we will + // dispatch 1 workgroups. If the user passes 65 inputs, we will dispatch 2 workgroups, etc. + let workgroup_count = arguments.len().div_ceil(64); + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + + // Now we drop the compute pass, giving us access to the encoder again. + drop(compute_pass); + + // We add a copy operation to the encoder. This will copy the data from the output buffer on the + // GPU to the download buffer on the CPU. + encoder.copy_buffer_to_buffer( + &output_data_buffer, + 0, + &download_buffer, + 0, + output_data_buffer.size(), + ); + + // We finish the encoder, giving us a fully recorded command buffer. + let command_buffer = encoder.finish(); + + // At this point nothing has actually been executed on the gpu. We have recorded a series of + // commands that we want to execute, but they haven't been sent to the gpu yet. + // + // Submitting to the queue sends the command buffer to the gpu. The gpu will then execute the + // commands in the command buffer in order. + queue.submit([command_buffer]); + + // We now map the download buffer so we can read it. Mapping tells wgpu that we want to read/write + // to the buffer directly by the CPU and it should not permit any more GPU operations on the buffer. + // + // Mapping requires that the GPU be finished using the buffer before it resolves, so mapping has a callback + // to tell you when the mapping is complete. + let buffer_slice = download_buffer.slice(..); + buffer_slice.map_async(wgpu::MapMode::Read, |_| { + // In this case we know exactly when the mapping will be finished, + // so we don't need to do anything in the callback. + }); + + // Wait for the GPU to finish working on the submitted work. This doesn't work on WebGPU, so we would need + // to rely on the callback to know when the buffer is mapped. + device.poll(wgpu::Maintain::Wait); + + // We can now read the data from the buffer. + let data = buffer_slice.get_mapped_range(); + // Convert the data back to a slice of f32. + let result: &[f32] = bytemuck::cast_slice(&data); + + // Print out the result. + println!("Result: {:?}", result); +} diff --git a/examples/standalone/01_hello_compute/src/shader.wgsl b/examples/standalone/01_hello_compute/src/shader.wgsl new file mode 100644 index 0000000000..54285d6da6 --- /dev/null +++ b/examples/standalone/01_hello_compute/src/shader.wgsl @@ -0,0 +1,27 @@ +// Input to the shader. The length of the array is determined by what buffer is bound. +// +// Out of bounds accesses +@group(0) @binding(0) +var input: array; +// Output of the shader. +@group(0) @binding(1) +var output: array; + +// Ideal workgroup size depends on the hardware, the workload, and other factors. However, it should +// _generally_ be a multiple of 64. Common sizes are 64x1x1, 256x1x1; or 8x8x1, 16x16x1 for 2D workloads. +@compute @workgroup_size(64) +fn doubleMe(@builtin(global_invocation_id) global_id: vec3) { + // While compute invocations are 3d, we're only using one dimension. + let index = global_id.x; + + // Because we're using a workgroup size of 64, if the input size isn't a multiple of 64, + // we will have some "extra" invocations. This is fine, but we should tell them to stop + // to avoid out-of-bounds accesses. + let array_length = arrayLength(&input); + if (global_id.x >= array_length) { + return; + } + + // Do the multiply by two and write to the output. + output[global_id.x] = input[global_id.x] * 2.0; +} diff --git a/examples/standalone/02_hello_window/Cargo.toml b/examples/standalone/02_hello_window/Cargo.toml new file mode 100644 index 0000000000..35bcc89364 --- /dev/null +++ b/examples/standalone/02_hello_window/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "wgpu-example-02-hello-window" +edition = "2021" +rust-version = "1.83" +publish = false + +[dependencies] +env_logger = "0.11.6" +pollster = "0.4" +wgpu = "24.0.0" +winit = { version = "0.30.8", features = ["android-native-activity"] } diff --git a/examples/standalone/02_hello_window/src/main.rs b/examples/standalone/02_hello_window/src/main.rs new file mode 100644 index 0000000000..4385aa5a21 --- /dev/null +++ b/examples/standalone/02_hello_window/src/main.rs @@ -0,0 +1,190 @@ +use std::sync::Arc; + +use winit::{ + application::ApplicationHandler, + event::WindowEvent, + event_loop::{ActiveEventLoop, ControlFlow, EventLoop}, + window::{Window, WindowId}, +}; + +struct State { + window: Arc, + device: wgpu::Device, + queue: wgpu::Queue, + size: winit::dpi::PhysicalSize, + surface: wgpu::Surface<'static>, + surface_format: wgpu::TextureFormat, +} + +impl State { + async fn new(window: Arc) -> State { + let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions::default()) + .await + .unwrap(); + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor::default(), + None, // Trace path + ) + .await + .unwrap(); + + let size = window.inner_size(); + + let surface = instance.create_surface(window.clone()).unwrap(); + let cap = surface.get_capabilities(&adapter); + let surface_format = cap.formats[0]; + + let state = State { + window, + device, + queue, + size, + surface, + surface_format, + }; + + // Configure surface for the first time + state.configure_surface(); + + state + } + + fn get_window(&self) -> &Window { + &self.window + } + + fn configure_surface(&self) { + let surface_config = wgpu::SurfaceConfiguration { + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + format: self.surface_format, + // Request compatibility with the sRGB-format texture view we‘re going to create later. + view_formats: vec![self.surface_format.add_srgb_suffix()], + alpha_mode: wgpu::CompositeAlphaMode::Auto, + width: self.size.width, + height: self.size.height, + desired_maximum_frame_latency: 2, + present_mode: wgpu::PresentMode::AutoVsync, + }; + self.surface.configure(&self.device, &surface_config); + } + + fn resize(&mut self, new_size: winit::dpi::PhysicalSize) { + self.size = new_size; + + // reconfigure the surface + self.configure_surface(); + } + + fn render(&mut self) { + // Create texture view + let surface_texture = self + .surface + .get_current_texture() + .expect("failed to acquire next swapchain texture"); + let texture_view = surface_texture + .texture + .create_view(&wgpu::TextureViewDescriptor { + // Without add_srgb_suffix() the image we will be working with + // might not be "gamma correct". + format: Some(self.surface_format.add_srgb_suffix()), + ..Default::default() + }); + + // Renders a GREEN screen + let mut encoder = self.device.create_command_encoder(&Default::default()); + // Create the renderpass which will clear the screen. + let renderpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &texture_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::GREEN), + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + + // If you wanted to call any drawing commands, they would go here. + + // End the renderpass. + drop(renderpass); + + // Submit the command in the queue to execute + self.queue.submit([encoder.finish()]); + surface_texture.present(); + } +} + +#[derive(Default)] +struct App { + state: Option, +} + +impl ApplicationHandler for App { + fn resumed(&mut self, event_loop: &ActiveEventLoop) { + // Create window object + let window = Arc::new( + event_loop + .create_window(Window::default_attributes()) + .unwrap(), + ); + + let state = pollster::block_on(State::new(window.clone())); + self.state = Some(state); + + window.request_redraw(); + } + + fn window_event(&mut self, event_loop: &ActiveEventLoop, _id: WindowId, event: WindowEvent) { + let state = self.state.as_mut().unwrap(); + match event { + WindowEvent::CloseRequested => { + println!("The close button was pressed; stopping"); + event_loop.exit(); + } + WindowEvent::RedrawRequested => { + state.render(); + // Emits a new redraw requested event. + state.get_window().request_redraw(); + } + WindowEvent::Resized(size) => { + // Reconfigures the size of the surface. We do not re-render + // here as this event is always followed up by redraw request. + state.resize(size); + } + _ => (), + } + } +} + +fn main() { + // wgpu uses `log` for all of our logging, so we initialize a logger with the `env_logger` crate. + // + // To change the log level, set the `RUST_LOG` environment variable. See the `env_logger` + // documentation for more information. + env_logger::init(); + + let event_loop = EventLoop::new().unwrap(); + + // When the current loop iteration finishes, immediately begin a new + // iteration regardless of whether or not new events are available to + // process. Preferred for applications that want to render as fast as + // possible, like games. + event_loop.set_control_flow(ControlFlow::Poll); + + // When the current loop iteration finishes, suspend the thread until + // another event arrives. Helps keeping CPU utilization low if nothing + // is happening, which is preferred if the application might be idling in + // the background. + // event_loop.set_control_flow(ControlFlow::Wait); + + let mut app = App::default(); + event_loop.run_app(&mut app).unwrap(); +} diff --git a/lock-analyzer/Cargo.toml b/lock-analyzer/Cargo.toml index 513e729162..d34aa3530a 100644 --- a/lock-analyzer/Cargo.toml +++ b/lock-analyzer/Cargo.toml @@ -17,3 +17,6 @@ anyhow.workspace = true [dependencies.serde] workspace = true features = ["default", "serde_derive"] + +[lints.clippy] +disallowed_types = "allow" diff --git a/naga-cli/Cargo.toml b/naga-cli/Cargo.toml index 4c45e24a11..2d00c4a06b 100644 --- a/naga-cli/Cargo.toml +++ b/naga-cli/Cargo.toml @@ -8,6 +8,12 @@ repository = "https://github.com/gfx-rs/wgpu/tree/trunk/naga-cli" keywords = ["shader", "SPIR-V", "GLSL", "MSL"] license = "MIT OR Apache-2.0" +# Override the workspace's `rust-version` key. Firefox uses `cargo vendor` to +# copy the crates it actually uses out of the workspace, so it's meaningful for +# them to have less restrictive MSRVs individually than the workspace as a +# whole, if their code permits. See `../README.md` for details. +rust-version = "1.76" + [[bin]] name = "naga" path = "src/bin/naga.rs" @@ -18,17 +24,7 @@ doc = false test = false [dependencies] -bincode.workspace = true -codespan-reporting.workspace = true -env_logger.workspace = true -argh.workspace = true -anyhow.workspace = true -log.workspace = true - -[dependencies.naga] -version = "24.0.0" -path = "../naga" -features = [ +naga = { workspace = true, features = [ "compact", "wgsl-in", "wgsl-out", @@ -41,4 +37,11 @@ features = [ "dot-out", "serialize", "deserialize", -] +] } + +bincode.workspace = true +codespan-reporting.workspace = true +env_logger.workspace = true +argh.workspace = true +anyhow = { workspace = true, features = ["std"] } +log.workspace = true diff --git a/naga/Cargo.toml b/naga/Cargo.toml index 2c8d846289..9cd92d1318 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -41,12 +41,23 @@ msl-out = [] ## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`. msl-out-if-target-apple = [] -serialize = ["dep:serde", "bitflags/serde", "indexmap/serde"] -deserialize = ["dep:serde", "bitflags/serde", "indexmap/serde"] +serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"] +deserialize = [ + "dep:serde", + "bitflags/serde", + "hashbrown/serde", + "indexmap/serde", +] arbitrary = ["dep:arbitrary", "bitflags/arbitrary", "indexmap/arbitrary"] spv-in = ["dep:petgraph", "dep:spirv"] spv-out = ["dep:spirv"] -wgsl-in = ["dep:hexf-parse", "dep:unicode-xid", "compact"] +wgsl-in = [ + "dep:hexf-parse", + "dep:strum", + "dep:unicode-xid", + "indexmap/std", + "compact", +] wgsl-out = [] ## Enables outputting to HLSL (Microsoft's High-Level Shader Language). @@ -72,10 +83,11 @@ termcolor = { version = "1.4.1" } # termcolor minimum version was wrong and was fixed in # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e codespan-reporting = { version = "0.11.0" } +hashbrown.workspace = true rustc-hash.workspace = true indexmap.workspace = true log = "0.4" -strum.workspace = true +strum = { workspace = true, optional = true } spirv = { version = "0.3", optional = true } thiserror.workspace = true serde = { version = "1.0.217", features = [ @@ -93,11 +105,12 @@ cfg_aliases.workspace = true [dev-dependencies] diff = "0.1" env_logger.workspace = true -itertools.workspace = true +hashbrown = { workspace = true, features = ["serde"] } # This _cannot_ have a version specified. If it does, crates.io will look # for a version of the package on crates when we publish naga. Path dependencies # are allowed through though. hlsl-snapshots = { path = "./hlsl-snapshots" } +itertools.workspace = true # Require at least version 0.7.1 of ron, this version changed how floating points are # serialized by forcing them to always have the decimal part, this makes it backwards # incompatible with our tests because we do a syntactic diff and not a semantic one. diff --git a/naga/fuzz/Cargo.toml b/naga/fuzz/Cargo.toml index cd6c62488e..5d8647f19c 100644 --- a/naga/fuzz/Cargo.toml +++ b/naga/fuzz/Cargo.toml @@ -50,3 +50,6 @@ path = "fuzz_targets/ir.rs" bench = false test = false doc = false + +[lints.clippy] +disallowed_types = "allow" diff --git a/naga/fuzz/fuzz_targets/glsl_parser.rs b/naga/fuzz/fuzz_targets/glsl_parser.rs index 97a6ae3fbf..103a844193 100644 --- a/naga/fuzz/fuzz_targets/glsl_parser.rs +++ b/naga/fuzz/fuzz_targets/glsl_parser.rs @@ -2,6 +2,8 @@ #[cfg(enable_fuzzing)] mod fuzz { + use std::iter::FromIterator; + use arbitrary::Arbitrary; use libfuzzer_sys::fuzz_target; use naga::{ @@ -9,34 +11,23 @@ mod fuzz { FastHashMap, ShaderStage, }; - #[derive(Debug, Arbitrary)] - enum ShaderStageProxy { - Vertex, - Fragment, - Compute, - } - - impl From for ShaderStage { - fn from(proxy: ShaderStageProxy) -> Self { - match proxy { - ShaderStageProxy::Vertex => ShaderStage::Vertex, - ShaderStageProxy::Fragment => ShaderStage::Fragment, - ShaderStageProxy::Compute => ShaderStage::Compute, - } - } - } - #[derive(Debug, Arbitrary)] struct OptionsProxy { - pub stage: ShaderStageProxy, - pub defines: FastHashMap, + pub stage: ShaderStage, + pub defines: std::collections::HashMap, } impl From for Options { fn from(proxy: OptionsProxy) -> Self { Options { - stage: proxy.stage.into(), - defines: proxy.defines, + stage: proxy.stage, + // NOTE: This is a workaround needed due to lack of rust-fuzz/arbitrary support for hashbrown. + defines: FastHashMap::from_iter( + proxy + .defines + .keys() + .map(|k| (k.clone(), proxy.defines.get(&k.clone()).unwrap().clone())), + ), } } } diff --git a/naga/src/arena/mod.rs b/naga/src/arena/mod.rs index 0747eaef72..014c5167c6 100644 --- a/naga/src/arena/mod.rs +++ b/naga/src/arena/mod.rs @@ -2,11 +2,10 @@ To improve translator performance and reduce memory usage, most structures are stored in an [`Arena`]. An `Arena` stores a series of `T` values, indexed by -[`Handle`](Handle) values, which are just wrappers around integer indexes. +[`Handle`] values, which are just wrappers around integer indexes. For example, a `Function`'s expressions are stored in an `Arena`, and compound expressions refer to their sub-expressions via `Handle` -values. (When examining the serialized form of a `Module`, note that the first -element of an `Arena` has an index of 1, not 0.) +values. A [`UniqueArena`] is just like an `Arena`, except that it stores only a single instance of each value. The value type must implement `Eq` and `Hash`. Like an @@ -95,7 +94,7 @@ impl Arena { /// Returns an iterator over the items stored in this arena, returning both /// the item's handle and a reference to it. - pub fn iter(&self) -> impl DoubleEndedIterator, &T)> { + pub fn iter(&self) -> impl DoubleEndedIterator, &T)> + ExactSizeIterator { self.data .iter() .enumerate() diff --git a/naga/src/arena/unique_arena.rs b/naga/src/arena/unique_arena.rs index c64bb302eb..9f5e26df11 100644 --- a/naga/src/arena/unique_arena.rs +++ b/naga/src/arena/unique_arena.rs @@ -108,7 +108,7 @@ impl Iterator for UniqueArenaDrain<'_, T> { impl UniqueArena { /// Returns an iterator over the items stored in this arena, returning both /// the item's handle and a reference to it. - pub fn iter(&self) -> impl DoubleEndedIterator, &T)> { + pub fn iter(&self) -> impl DoubleEndedIterator, &T)> + ExactSizeIterator { self.set.iter().enumerate().map(|(i, v)| { let index = unsafe { Index::new_unchecked(i as u32) }; (Handle::new(index), v) diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index d0954f637a..d2f074f002 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -51,6 +51,7 @@ use crate::{ valid, Handle, ShaderStage, TypeInner, }; use features::FeaturesManager; +use hashbrown::hash_map; use std::{ cmp::Ordering, fmt::{self, Error as FmtError, Write}, @@ -4609,7 +4610,6 @@ impl<'a, W: Write> Writer<'a, W> { /// Helper method used to produce the reflection info that's returned to the user fn collect_reflection_info(&mut self) -> Result { - use std::collections::hash_map::Entry; let info = self.info.get_entry_point(self.entry_point_idx as usize); let mut texture_mapping = crate::FastHashMap::default(); let mut uniforms = crate::FastHashMap::default(); @@ -4618,13 +4618,13 @@ impl<'a, W: Write> Writer<'a, W> { let tex_name = self.reflection_names_globals[&sampling.image].clone(); match texture_mapping.entry(tex_name) { - Entry::Vacant(v) => { + hash_map::Entry::Vacant(v) => { v.insert(TextureMapping { texture: sampling.image, sampler: Some(sampling.sampler), }); } - Entry::Occupied(e) => { + hash_map::Entry::Occupied(e) => { if e.get().sampler != Some(sampling.sampler) { log::error!("Conflicting samplers for {}", e.key()); return Err(Error::ImageMultipleSamplers); @@ -4642,13 +4642,13 @@ impl<'a, W: Write> Writer<'a, W> { TypeInner::Image { .. } => { let tex_name = self.reflection_names_globals[&handle].clone(); match texture_mapping.entry(tex_name) { - Entry::Vacant(v) => { + hash_map::Entry::Vacant(v) => { v.insert(TextureMapping { texture: handle, sampler: None, }); } - Entry::Occupied(_) => { + hash_map::Entry::Occupied(_) => { // already used with a sampler, do nothing } } diff --git a/naga/src/back/hlsl/conv.rs b/naga/src/back/hlsl/conv.rs index 9573fce2a8..a4c6c1941d 100644 --- a/naga/src/back/hlsl/conv.rs +++ b/naga/src/back/hlsl/conv.rs @@ -127,14 +127,14 @@ impl crate::StorageFormat { Self::R8Sint | Self::R16Sint | Self::R32Sint => "int", Self::R64Uint => "uint64_t", - Self::Rg16Float | Self::Rg32Float => "float2", - Self::Rg8Unorm | Self::Rg16Unorm => "unorm float2", - Self::Rg8Snorm | Self::Rg16Snorm => "snorm float2", + Self::Rg16Float | Self::Rg32Float => "float4", + Self::Rg8Unorm | Self::Rg16Unorm => "unorm float4", + Self::Rg8Snorm | Self::Rg16Snorm => "snorm float4", - Self::Rg8Sint | Self::Rg16Sint | Self::Rg32Uint => "int2", - Self::Rg8Uint | Self::Rg16Uint | Self::Rg32Sint => "uint2", + Self::Rg8Sint | Self::Rg16Sint | Self::Rg32Uint => "int4", + Self::Rg8Uint | Self::Rg16Uint | Self::Rg32Sint => "uint4", - Self::Rg11b10Ufloat => "float3", + Self::Rg11b10Ufloat => "float4", Self::Rgba16Float | Self::Rgba32Float => "float4", Self::Rgba8Unorm | Self::Bgra8Unorm | Self::Rgba16Unorm | Self::Rgb10a2Unorm => { diff --git a/naga/src/back/hlsl/help.rs b/naga/src/back/hlsl/help.rs index f63c9d2cfd..46096aaa68 100644 --- a/naga/src/back/hlsl/help.rs +++ b/naga/src/back/hlsl/help.rs @@ -1190,6 +1190,85 @@ impl super::Writer<'_, W> { Ok(()) } + /// Writes out the sampler heap declarations if they haven't been written yet. + pub(super) fn write_sampler_heaps(&mut self) -> BackendResult { + if self.wrapped.sampler_heaps { + return Ok(()); + } + + writeln!( + self.out, + "SamplerState {}[2048]: register(s{}, space{});", + super::writer::SAMPLER_HEAP_VAR, + self.options.sampler_heap_target.standard_samplers.register, + self.options.sampler_heap_target.standard_samplers.space + )?; + writeln!( + self.out, + "SamplerComparisonState {}[2048]: register(s{}, space{});", + super::writer::COMPARISON_SAMPLER_HEAP_VAR, + self.options + .sampler_heap_target + .comparison_samplers + .register, + self.options.sampler_heap_target.comparison_samplers.space + )?; + + self.wrapped.sampler_heaps = true; + + Ok(()) + } + + /// Writes out the sampler index buffer declaration if it hasn't been written yet. + pub(super) fn write_wrapped_sampler_buffer( + &mut self, + key: super::SamplerIndexBufferKey, + ) -> BackendResult { + // The astute will notice that we do a double hash lookup, but we do this to avoid + // holding a mutable reference to `self` while trying to call `write_sampler_heaps`. + // + // We only pay this double lookup cost when we actually need to write out the sampler + // buffer, which should be not be common. + + if self.wrapped.sampler_index_buffers.contains_key(&key) { + return Ok(()); + }; + + self.write_sampler_heaps()?; + + // Because the group number can be arbitrary, we use the namer to generate a unique name + // instead of adding it to the reserved name list. + let sampler_array_name = self + .namer + .call(&format!("nagaGroup{}SamplerIndexArray", key.group)); + + let bind_target = match self.options.sampler_buffer_binding_map.get(&key) { + Some(&bind_target) => bind_target, + None if self.options.fake_missing_bindings => super::BindTarget { + space: u8::MAX, + register: key.group, + binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, + }, + None => { + unreachable!("Sampler buffer of group {key:?} not bound to a register"); + } + }; + + writeln!( + self.out, + "StructuredBuffer {sampler_array_name} : register(t{}, space{});", + bind_target.register, bind_target.space + )?; + + self.wrapped + .sampler_index_buffers + .insert(key, sampler_array_name); + + Ok(()) + } + pub(super) fn write_texture_coordinates( &mut self, kind: &str, diff --git a/naga/src/back/hlsl/keywords.rs b/naga/src/back/hlsl/keywords.rs index c15e17636c..a5a6059a32 100644 --- a/naga/src/back/hlsl/keywords.rs +++ b/naga/src/back/hlsl/keywords.rs @@ -820,6 +820,8 @@ pub const RESERVED: &[&str] = &[ super::writer::FREXP_FUNCTION, super::writer::EXTRACT_BITS_FUNCTION, super::writer::INSERT_BITS_FUNCTION, + super::writer::SAMPLER_HEAP_VAR, + super::writer::COMPARISON_SAMPLER_HEAP_VAR, ]; // DXC scalar types, from https://github.com/microsoft/DirectXShaderCompiler/blob/18c9e114f9c314f93e68fbc72ce207d4ed2e65ae/tools/clang/lib/AST/ASTContextHLSL.cpp#L48-L254 @@ -905,3 +907,5 @@ pub const TYPES: &[&str] = &{ res }; + +pub const RESERVED_PREFIXES: &[&str] = &["__dynamic_buffer_offsets"]; diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index dcce866bac..48ff883477 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -92,6 +92,15 @@ float3x2 GetMatmOnBaz(Baz obj) { We also emit an analogous `Set` function, as well as functions for accessing individual columns by dynamic index. +## Sampler Handling + +Due to limitations in how sampler heaps work in D3D12, we need to access samplers +through a layer of indirection. Instead of directly binding samplers, we bind the entire +sampler heap as both a standard and a comparison sampler heap. We then use a sampler +index buffer for each bind group. This buffer is accessed in the shader to get the actual +sampler index within the heap. See the wgpu_hal dx12 backend documentation for more +information. + [hlsl]: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl [ilov]: https://gpuweb.github.io/gpuweb/wgsl/#internal-value-layout [16bb]: https://github.com/microsoft/DirectXShaderCompiler/wiki/Buffer-Packing#constant-buffer-packing @@ -110,14 +119,34 @@ use thiserror::Error; use crate::{back, proc}; -#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serialize", derive(serde::Serialize))] #[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] pub struct BindTarget { pub space: u8, + /// For regular bindings this is the register number. + /// + /// For sampler bindings, this is the index to use into the bind group's sampler index buffer. pub register: u32, /// If the binding is an unsized binding array, this overrides the size. pub binding_array_size: Option, + /// This is the index in the buffer at [`Options::dynamic_storage_buffer_offsets_targets`]. + pub dynamic_storage_buffer_offsets_index: Option, + /// This is a hint that we need to restrict indexing of vectors, matrices and arrays. + /// + /// If [`Options::restrict_indexing`] is also `true`, we will restrict indexing. + #[cfg_attr(any(feature = "serialize", feature = "deserialize"), serde(default))] + pub restrict_indexing: bool, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serialize", derive(serde::Serialize))] +#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] +/// BindTarget for dynamic storage buffer offsets +pub struct OffsetsBindTarget { + pub space: u8, + pub register: u32, + pub size: u32, } // Using `BTreeMap` instead of `HashMap` so that we can hash itself. @@ -179,6 +208,47 @@ impl crate::ImageDimension { } } +#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "serialize", derive(serde::Serialize))] +#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] +pub struct SamplerIndexBufferKey { + pub group: u32, +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "serialize", derive(serde::Serialize))] +#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] +#[cfg_attr(feature = "deserialize", serde(default))] +pub struct SamplerHeapBindTargets { + pub standard_samplers: BindTarget, + pub comparison_samplers: BindTarget, +} + +impl Default for SamplerHeapBindTargets { + fn default() -> Self { + Self { + standard_samplers: BindTarget { + space: 0, + register: 0, + binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, + }, + comparison_samplers: BindTarget { + space: 1, + register: 0, + binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, + }, + } + } +} + +// We use a BTreeMap here so that we can hash it. +pub type SamplerIndexBufferBindingMap = + std::collections::BTreeMap; + /// Shorthand result used internally by the backend type BackendResult = Result<(), Error>; @@ -207,10 +277,19 @@ pub struct Options { pub special_constants_binding: Option, /// Bind target of the push constant buffer pub push_constants_target: Option, + /// Bind target of the sampler heap and comparison sampler heap. + pub sampler_heap_target: SamplerHeapBindTargets, + /// Mapping of each bind group's sampler index buffer to a bind target. + pub sampler_buffer_binding_map: SamplerIndexBufferBindingMap, + /// Bind target for dynamic storage buffer offsets + pub dynamic_storage_buffer_offsets_targets: std::collections::BTreeMap, /// Should workgroup variables be zero initialized (by polyfilling)? pub zero_initialize_workgroup_memory: bool, /// Should we restrict indexing of vectors, matrices and arrays? pub restrict_indexing: bool, + /// If set, loops will have code injected into them, forcing the compiler + /// to think the number of iterations is bounded. + pub force_loop_bounding: bool, } impl Default for Options { @@ -220,9 +299,13 @@ impl Default for Options { binding_map: BindingMap::default(), fake_missing_bindings: true, special_constants_binding: None, + sampler_heap_target: SamplerHeapBindTargets::default(), + sampler_buffer_binding_map: std::collections::BTreeMap::default(), push_constants_target: None, + dynamic_storage_buffer_offsets_targets: std::collections::BTreeMap::new(), zero_initialize_workgroup_memory: true, restrict_indexing: true, + force_loop_bounding: true, } } } @@ -233,13 +316,15 @@ impl Options { res_binding: &crate::ResourceBinding, ) -> Result { match self.binding_map.get(res_binding) { - Some(target) => Ok(target.clone()), + Some(target) => Ok(*target), None if self.fake_missing_bindings => Ok(BindTarget { space: res_binding.group as u8, register: res_binding.binding, binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, }), - None => Err(EntryPointError::MissingBinding(res_binding.clone())), + None => Err(EntryPointError::MissingBinding(*res_binding)), } } } @@ -279,6 +364,10 @@ struct Wrapped { struct_matrix_access: crate::FastHashSet, mat_cx2s: crate::FastHashSet, math: crate::FastHashSet, + /// If true, the sampler heaps have been written out. + sampler_heaps: bool, + // Mapping from SamplerIndexBufferKey to the name the namer returned. + sampler_index_buffers: crate::FastHashMap, } impl Wrapped { diff --git a/naga/src/back/hlsl/ray.rs b/naga/src/back/hlsl/ray.rs index 305a086180..8a50738616 100644 --- a/naga/src/back/hlsl/ray.rs +++ b/naga/src/back/hlsl/ray.rs @@ -47,11 +47,11 @@ impl super::Writer<'_, W> { writeln!(self.out, " ret.t = rq.CommittedRayT();")?; writeln!( self.out, - " ret.instance_custom_index = rq.CommittedInstanceID();" + " ret.instance_custom_data = rq.CommittedInstanceID();" )?; writeln!( self.out, - " ret.instance_id = rq.CommittedInstanceIndex();" + " ret.instance_index = rq.CommittedInstanceIndex();" )?; writeln!( self.out, @@ -139,11 +139,11 @@ impl super::Writer<'_, W> { writeln!( self.out, - " ret.instance_custom_index = rq.CandidateInstanceID();" + " ret.instance_custom_data = rq.CandidateInstanceID();" )?; writeln!( self.out, - " ret.instance_id = rq.CandidateInstanceIndex();" + " ret.instance_index = rq.CandidateInstanceIndex();" )?; writeln!( self.out, diff --git a/naga/src/back/hlsl/storage.rs b/naga/src/back/hlsl/storage.rs index 9fbdf6769a..4576a3ace6 100644 --- a/naga/src/back/hlsl/storage.rs +++ b/naga/src/back/hlsl/storage.rs @@ -76,6 +76,11 @@ const STORE_TEMP_NAME: &str = "_value"; /// [`Storage`]: crate::AddressSpace::Storage #[derive(Debug)] pub(super) enum SubAccess { + BufferOffset { + group: u32, + offset: u32, + }, + /// Add the given byte offset. This is used for struct members, or /// known components of a vector or matrix. In all those cases, /// the byte offset is a compile-time constant. @@ -119,6 +124,9 @@ impl super::Writer<'_, W> { write!(self.out, "+")?; } match *access { + SubAccess::BufferOffset { group, offset } => { + write!(self.out, "__dynamic_buffer_offsets{group}._{offset}")?; + } SubAccess::Offset(offset) => { write!(self.out, "{offset}")?; } @@ -492,7 +500,21 @@ impl super::Writer<'_, W> { loop { let (next_expr, access_index) = match func_ctx.expressions[cur_expr] { - crate::Expression::GlobalVariable(handle) => return Ok(handle), + crate::Expression::GlobalVariable(handle) => { + if let Some(ref binding) = module.global_variables[handle].binding { + // this was already resolved earlier when we started evaluating an entry point. + let bt = self.options.resolve_resource_binding(binding).unwrap(); + if let Some(dynamic_storage_buffer_offsets_index) = + bt.dynamic_storage_buffer_offsets_index + { + self.temp_access_chain.push(SubAccess::BufferOffset { + group: binding.group, + offset: dynamic_storage_buffer_offsets_index, + }); + } + } + return Ok(handle); + } crate::Expression::Access { base, index } => (base, AccessIndex::Expression(index)), crate::Expression::AccessIndex { base, index } => { (base, AccessIndex::Constant(index)) diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index 9c061b8d15..7e8c322509 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -24,6 +24,8 @@ pub(crate) const MODF_FUNCTION: &str = "naga_modf"; pub(crate) const FREXP_FUNCTION: &str = "naga_frexp"; pub(crate) const EXTRACT_BITS_FUNCTION: &str = "naga_extractBits"; pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits"; +pub(crate) const SAMPLER_HEAP_VAR: &str = "nagaSamplerHeap"; +pub(crate) const COMPARISON_SAMPLER_HEAP_VAR: &str = "nagaComparisonSamplerHeap"; struct EpStructMember { name: String, @@ -94,6 +96,16 @@ const fn is_subgroup_builtin_binding(binding: &Option) -> bool { ) } +/// Information for how to generate a `binding_array` access. +struct BindingArraySamplerInfo { + /// Variable name of the sampler heap + sampler_heap_name: &'static str, + /// Variable name of the sampler index buffer + sampler_index_buffer_name: String, + /// Variable name of the base index _into_ the sampler index buffer + binding_array_base_index_name: String, +} + impl<'a, W: fmt::Write> super::Writer<'a, W> { pub fn new(out: W, options: &'a Options) -> Self { Self { @@ -119,7 +131,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { super::keywords::RESERVED, super::keywords::TYPES, super::keywords::RESERVED_CASE_INSENSITIVE, - &[], + super::keywords::RESERVED_PREFIXES, &mut self.names, ); self.entry_point_io.clear(); @@ -131,6 +143,33 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.need_bake_expressions.clear(); } + /// Generates statements to be inserted immediately before and at the very + /// start of the body of each loop, to defeat infinite loop reasoning. + /// The 0th item of the returned tuple should be inserted immediately prior + /// to the loop and the 1st item should be inserted at the very start of + /// the loop body. + /// + /// See [`back::msl::Writer::gen_force_bounded_loop_statements`] for details. + fn gen_force_bounded_loop_statements( + &mut self, + level: back::Level, + ) -> Option<(String, String)> { + if !self.options.force_loop_bounding { + return None; + } + + let loop_bound_name = self.namer.call("loop_bound"); + let decl = format!("{level}uint2 {loop_bound_name} = uint2(0u, 0u);"); + let level = level.next(); + let max = u32::MAX; + let break_and_inc = format!( + "{level}if (all({loop_bound_name} == uint2({max}u, {max}u))) {{ break; }} +{level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);" + ); + + Some((decl, break_and_inc)) + } + /// Helper method used to find which expressions of a given function require baking /// /// # Notes @@ -143,11 +182,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { ) { use crate::Expression; self.need_bake_expressions.clear(); - for (fun_handle, expr) in func.expressions.iter() { - let expr_info = &info[fun_handle]; - let min_ref_count = func.expressions[fun_handle].bake_ref_count(); + for (exp_handle, expr) in func.expressions.iter() { + let expr_info = &info[exp_handle]; + let min_ref_count = func.expressions[exp_handle].bake_ref_count(); if min_ref_count <= expr_info.ref_count { - self.need_bake_expressions.insert(fun_handle); + self.need_bake_expressions.insert(exp_handle); } if let Expression::Math { fun, arg, .. } = *expr { @@ -172,7 +211,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.need_bake_expressions.insert(arg); } crate::MathFunction::CountLeadingZeros => { - let inner = info[fun_handle].ty.inner_with(&module.types); + let inner = info[exp_handle].ty.inner_with(&module.types); if let Some(ScalarKind::Sint) = inner.scalar_kind() { self.need_bake_expressions.insert(arg); } @@ -187,6 +226,14 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.need_bake_expressions.insert(expr); } } + + if let Expression::GlobalVariable(_) = *expr { + let inner = info[exp_handle].ty.inner_with(&module.types); + + if let TypeInner::Sampler { .. } = *inner { + self.need_bake_expressions.insert(exp_handle); + } + } } for statement in func.body.iter() { match *statement { @@ -236,6 +283,22 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out)?; } + for (group, bt) in self.options.dynamic_storage_buffer_offsets_targets.iter() { + writeln!(self.out, "struct __dynamic_buffer_offsetsTy{} {{", group)?; + for i in 0..bt.size { + writeln!(self.out, "{}uint _{};", back::INDENT, i)?; + } + writeln!(self.out, "}};")?; + writeln!( + self.out, + "ConstantBuffer<__dynamic_buffer_offsetsTy{}> __dynamic_buffer_offsets{}: register(b{}, space{});", + group, group, bt.register, bt.space + )?; + + // Extra newline for readability + writeln!(self.out)?; + } + // Save all entry point output types let ep_results = module .entry_points @@ -814,6 +877,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } } + let handle_ty = match *inner { + TypeInner::BindingArray { ref base, .. } => &module.types[*base].inner, + _ => inner, + }; + + // Samplers are handled entirely differently, so defer entirely to that method. + let is_sampler = matches!(*handle_ty, TypeInner::Sampler { .. }); + + if is_sampler { + return self.write_global_sampler(module, handle, global); + } + // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-variable-register let register_ty = match global.space { crate::AddressSpace::Function => unreachable!("Function address space"), @@ -843,13 +918,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { register } crate::AddressSpace::Handle => { - let handle_ty = match *inner { - TypeInner::BindingArray { ref base, .. } => &module.types[*base].inner, - _ => inner, - }; - let register = match *handle_ty { - TypeInner::Sampler { .. } => "s", // all storage textures are UAV, unconditionally TypeInner::Image { class: crate::ImageClass::Storage { .. }, @@ -956,6 +1025,66 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { Ok(()) } + fn write_global_sampler( + &mut self, + module: &Module, + handle: Handle, + global: &crate::GlobalVariable, + ) -> BackendResult { + let binding = *global.binding.as_ref().unwrap(); + + let key = super::SamplerIndexBufferKey { + group: binding.group, + }; + self.write_wrapped_sampler_buffer(key)?; + + // This was already validated, so we can confidently unwrap it. + let bt = self.options.resolve_resource_binding(&binding).unwrap(); + + match module.types[global.ty].inner { + TypeInner::Sampler { comparison } => { + // If we are generating a static access, we create a variable for the sampler. + // + // This prevents the DXIL from containing multiple lookups for the sampler, which + // the backend compiler will then have to eliminate. AMD does seem to be able to + // eliminate these, but better safe than sorry. + + write!(self.out, "static const ")?; + self.write_type(module, global.ty)?; + + let heap_var = if comparison { + COMPARISON_SAMPLER_HEAP_VAR + } else { + SAMPLER_HEAP_VAR + }; + + let index_buffer_name = &self.wrapped.sampler_index_buffers[&key]; + let name = &self.names[&NameKey::GlobalVariable(handle)]; + writeln!( + self.out, + " {name} = {heap_var}[{index_buffer_name}[{register}]];", + register = bt.register + )?; + } + TypeInner::BindingArray { .. } => { + // If we are generating a binding array, we cannot directly access the sampler as the index + // into the sampler index buffer is unknown at compile time. Instead we generate a constant + // that represents the "base" index into the sampler index buffer. This constant is added + // to the user provided index to get the final index into the sampler index buffer. + + let name = &self.names[&NameKey::GlobalVariable(handle)]; + writeln!( + self.out, + "static const uint {name} = {register};", + register = bt.register + )?; + } + _ => unreachable!(), + }; + + Ok(()) + } + /// Helper method used to write global constants /// /// # Notes @@ -1250,25 +1379,37 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.update_expressions_to_bake(module, func, info); - // Write modifier - if let Some(crate::FunctionResult { - binding: - Some( - ref binding @ crate::Binding::BuiltIn(crate::BuiltIn::Position { - invariant: true, - }), - ), - .. - }) = func.result - { - self.write_modifier(binding)?; - } - - // Write return type if let Some(ref result) = func.result { + // Write typedef if return type is an array + let array_return_type = match module.types[result.ty].inner { + TypeInner::Array { base, size, .. } => { + let array_return_type = self.namer.call(&format!("ret_{name}")); + write!(self.out, "typedef ")?; + self.write_type(module, result.ty)?; + write!(self.out, " {}", array_return_type)?; + self.write_array_size(module, base, size)?; + writeln!(self.out, ";")?; + Some(array_return_type) + } + _ => None, + }; + + // Write modifier + if let Some( + ref binding @ crate::Binding::BuiltIn(crate::BuiltIn::Position { invariant: true }), + ) = result.binding + { + self.write_modifier(binding)?; + } + + // Write return type match func_ctx.ty { back::FunctionType::Function(_) => { - self.write_type(module, result.ty)?; + if let Some(array_return_type) = array_return_type { + write!(self.out, "{array_return_type}")?; + } else { + self.write_type(module, result.ty)?; + } } back::FunctionType::EntryPoint(index) => { if let Some(ref ep_output) = self.entry_point_io[index as usize].output { @@ -2048,12 +2189,24 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { ref continuing, break_if, } => { + let force_loop_bound_statements = self.gen_force_bounded_loop_statements(level); + let gate_name = (!continuing.is_empty() || break_if.is_some()) + .then(|| self.namer.call("loop_init")); + + if let Some((ref decl, _)) = force_loop_bound_statements { + writeln!(self.out, "{decl}")?; + } + if let Some(ref gate_name) = gate_name { + writeln!(self.out, "{level}bool {gate_name} = true;")?; + } + self.continue_ctx.enter_loop(); + writeln!(self.out, "{level}while(true) {{")?; + if let Some((_, ref break_and_inc)) = force_loop_bound_statements { + writeln!(self.out, "{break_and_inc}")?; + } let l2 = level.next(); - if !continuing.is_empty() || break_if.is_some() { - let gate_name = self.namer.call("loop_init"); - writeln!(self.out, "{level}bool {gate_name} = true;")?; - writeln!(self.out, "{level}while(true) {{")?; + if let Some(gate_name) = gate_name { writeln!(self.out, "{l2}if (!{gate_name}) {{")?; let l3 = l2.next(); for sta in continuing.iter() { @@ -2068,13 +2221,12 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } writeln!(self.out, "{l2}}}")?; writeln!(self.out, "{l2}{gate_name} = false;")?; - } else { - writeln!(self.out, "{level}while(true) {{")?; } for sta in body.iter() { self.write_stmt(module, sta, func_ctx, l2)?; } + writeln!(self.out, "{level}}}")?; self.continue_ctx.exit_loop(); } @@ -2126,13 +2278,21 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { write!(self.out, "const ")?; let name = Baked(expr).to_string(); let expr_ty = &func_ctx.info[expr].ty; - match *expr_ty { - proc::TypeResolution::Handle(handle) => self.write_type(module, handle)?, + let ty_inner = match *expr_ty { + proc::TypeResolution::Handle(handle) => { + self.write_type(module, handle)?; + &module.types[handle].inner + } proc::TypeResolution::Value(ref value) => { - self.write_value_type(module, value)? + self.write_value_type(module, value)?; + value } }; - write!(self.out, " {name} = ")?; + write!(self.out, " {name}")?; + if let TypeInner::Array { base, size, .. } = *ty_inner { + self.write_array_size(module, base, size)?; + } + write!(self.out, " = ")?; self.named_expressions.insert(expr, name); } let func_name = &self.names[&NameKey::Function(function)]; @@ -2670,7 +2830,16 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { }; self.write_expr(module, base, func_ctx)?; - write!(self.out, "[")?; + + let array_sampler_info = self.sampler_binding_array_info_from_expression( + module, func_ctx, base, resolved, + ); + + if let Some(ref info) = array_sampler_info { + write!(self.out, "{}[", info.sampler_heap_name)?; + } else { + write!(self.out, "[")?; + } let needs_bound_check = self.options.restrict_indexing && !indexing_binding_array @@ -2682,7 +2851,20 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { | crate::AddressSpace::PushConstant, ) | None => true, - Some(crate::AddressSpace::Uniform) => false, // TODO: needs checks for dynamic uniform buffers, see https://github.com/gfx-rs/wgpu/issues/4483 + Some(crate::AddressSpace::Uniform) => { + // check if BindTarget.restrict_indexing is set, this is used for dynamic buffers + let var_handle = self.fill_access_chain(module, base, func_ctx)?; + let bind_target = self + .options + .resolve_resource_binding( + module.global_variables[var_handle] + .binding + .as_ref() + .unwrap(), + ) + .unwrap(); + bind_target.restrict_indexing + } Some( crate::AddressSpace::Handle | crate::AddressSpace::Storage { .. }, ) => unreachable!(), @@ -2715,7 +2897,17 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { if non_uniform_qualifier { write!(self.out, "NonUniformResourceIndex(")?; } + if let Some(ref info) = array_sampler_info { + write!( + self.out, + "{}[{} + ", + info.sampler_index_buffer_name, info.binding_array_base_index_name, + )?; + } self.write_expr(module, index, func_ctx)?; + if array_sampler_info.is_some() { + write!(self.out, "]")?; + } if non_uniform_qualifier { write!(self.out, ")")?; } @@ -2730,43 +2922,6 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { { // do nothing, the chain is written on `Load`/`Store` } else { - fn write_access( - writer: &mut super::Writer<'_, W>, - resolved: &TypeInner, - base_ty_handle: Option>, - index: u32, - ) -> BackendResult { - match *resolved { - // We specifically lift the ValuePointer to this case. While `[0]` is valid - // HLSL for any vector behind a value pointer, FXC completely miscompiles - // it and generates completely nonsensical DXBC. - // - // See https://github.com/gfx-rs/naga/issues/2095 for more details. - TypeInner::Vector { .. } | TypeInner::ValuePointer { .. } => { - // Write vector access as a swizzle - write!(writer.out, ".{}", back::COMPONENTS[index as usize])? - } - TypeInner::Matrix { .. } - | TypeInner::Array { .. } - | TypeInner::BindingArray { .. } => write!(writer.out, "[{index}]")?, - TypeInner::Struct { .. } => { - // This will never panic in case the type is a `Struct`, this is not true - // for other types so we can only check while inside this match arm - let ty = base_ty_handle.unwrap(); - - write!( - writer.out, - ".{}", - &writer.names[&NameKey::StructMember(ty, index)] - )? - } - ref other => { - return Err(Error::Custom(format!("Cannot index {other:?}"))) - } - } - Ok(()) - } - // We write the matrix column access in a special way since // the type of `base` is our special __matCx2 struct. if let Some(MatrixType { @@ -2816,8 +2971,60 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } } + let array_sampler_info = self.sampler_binding_array_info_from_expression( + module, func_ctx, base, resolved, + ); + + if let Some(ref info) = array_sampler_info { + write!( + self.out, + "{}[{}", + info.sampler_heap_name, info.sampler_index_buffer_name + )?; + } + self.write_expr(module, base, func_ctx)?; - write_access(self, resolved, base_ty_handle, index)?; + + match *resolved { + // We specifically lift the ValuePointer to this case. While `[0]` is valid + // HLSL for any vector behind a value pointer, FXC completely miscompiles + // it and generates completely nonsensical DXBC. + // + // See https://github.com/gfx-rs/naga/issues/2095 for more details. + TypeInner::Vector { .. } | TypeInner::ValuePointer { .. } => { + // Write vector access as a swizzle + write!(self.out, ".{}", back::COMPONENTS[index as usize])? + } + TypeInner::Matrix { .. } + | TypeInner::Array { .. } + | TypeInner::BindingArray { .. } => { + if let Some(ref info) = array_sampler_info { + write!( + self.out, + "[{} + {index}]", + info.binding_array_base_index_name + )?; + } else { + write!(self.out, "[{index}]")?; + } + } + TypeInner::Struct { .. } => { + // This will never panic in case the type is a `Struct`, this is not true + // for other types so we can only check while inside this match arm + let ty = base_ty_handle.unwrap(); + + write!( + self.out, + ".{}", + &self.names[&NameKey::StructMember(ty, index)] + )? + } + ref other => return Err(Error::Custom(format!("Cannot index {other:?}"))), + } + + if array_sampler_info.is_some() { + write!(self.out, "]")?; + } } } Expression::FunctionArgument(pos) => { @@ -2958,13 +3165,30 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { write!(self.out, ".x")?; } } - Expression::GlobalVariable(handle) => match module.global_variables[handle].space { - crate::AddressSpace::Storage { .. } => {} - _ => { + Expression::GlobalVariable(handle) => { + let global_variable = &module.global_variables[handle]; + let ty = &module.types[global_variable.ty].inner; + + // In the case of binding arrays of samplers, we need to not write anything + // as the we are in the wrong position to fully write the expression. + // + // The entire writing is done by AccessIndex. + let is_binding_array_of_samplers = match *ty { + TypeInner::BindingArray { base, .. } => { + let base_ty = &module.types[base].inner; + matches!(*base_ty, TypeInner::Sampler { .. }) + } + _ => false, + }; + + let is_storage_space = + matches!(global_variable.space, crate::AddressSpace::Storage { .. }); + + if !is_binding_array_of_samplers && !is_storage_space { let name = &self.names[&NameKey::GlobalVariable(handle)]; write!(self.out, "{name}")?; } - }, + } Expression::LocalVariable(handle) => { write!(self.out, "{}", self.names[&func_ctx.name_key(handle)])? } @@ -3682,6 +3906,52 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { Ok(()) } + /// Find the [`BindingArraySamplerInfo`] from an expression so that such an access + /// can be generated later. + fn sampler_binding_array_info_from_expression( + &mut self, + module: &Module, + func_ctx: &back::FunctionCtx<'_>, + base: Handle, + resolved: &TypeInner, + ) -> Option { + if let TypeInner::BindingArray { + base: base_ty_handle, + .. + } = *resolved + { + let base_ty = &module.types[base_ty_handle].inner; + if let TypeInner::Sampler { comparison, .. } = *base_ty { + let base = &func_ctx.expressions[base]; + + if let crate::Expression::GlobalVariable(handle) = *base { + let variable = &module.global_variables[handle]; + + let sampler_heap_name = match comparison { + true => COMPARISON_SAMPLER_HEAP_VAR, + false => SAMPLER_HEAP_VAR, + }; + + return Some(BindingArraySamplerInfo { + sampler_heap_name, + sampler_index_buffer_name: self + .wrapped + .sampler_index_buffers + .get(&super::SamplerIndexBufferKey { + group: variable.binding.unwrap().group, + }) + .unwrap() + .clone(), + binding_array_base_index_name: self.names[&NameKey::GlobalVariable(handle)] + .clone(), + }); + } + } + } + + None + } + fn write_named_expr( &mut self, module: &Module, diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs index 58c7fa02cb..e839853008 100644 --- a/naga/src/back/mod.rs +++ b/naga/src/back/mod.rs @@ -55,7 +55,7 @@ impl std::fmt::Display for Baked { /// the key must be the constant's identifier name. /// /// The value may represent any of WGSL's concrete scalar types. -pub type PipelineConstants = std::collections::HashMap; +pub type PipelineConstants = hashbrown::HashMap; /// Indentation level. #[derive(Clone, Copy)] diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs index 28e99acc5f..be94b26742 100644 --- a/naga/src/back/msl/mod.rs +++ b/naga/src/back/msl/mod.rs @@ -148,6 +148,8 @@ pub enum Error { UnsupportedRayTracing, #[error("overrides should not be present at this stage")] Override, + #[error("bitcasting to {0:?} is not supported")] + UnsupportedBitCast(crate::TypeInner), } #[derive(Clone, Debug, PartialEq, thiserror::Error)] @@ -496,7 +498,7 @@ impl Options { index: 0, interpolation: None, }), - None => Err(EntryPointError::MissingBindTarget(res_binding.clone())), + None => Err(EntryPointError::MissingBindTarget(*res_binding)), } } diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 6b999b32e7..8913996b04 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -389,11 +389,6 @@ pub struct Writer { /// Set of (struct type, struct field index) denoting which fields require /// padding inserted **before** them (i.e. between fields at index - 1 and index) struct_member_pads: FastHashSet<(Handle, u32)>, - - /// Name of the force-bounded-loop macro. - /// - /// See `emit_force_bounded_loop_macro` for details. - force_bounded_loop_macro_name: String, } impl crate::Scalar { @@ -607,7 +602,7 @@ struct ExpressionContext<'a> { /// accesses. These may need to be cached in temporary variables. See /// `index::find_checked_indexes` for details. guarded_indices: HandleSet, - /// See [`Writer::emit_force_bounded_loop_macro`] for details. + /// See [`Writer::gen_force_bounded_loop_statements`] for details. force_loop_bounding: bool, } @@ -691,7 +686,6 @@ impl Writer { #[cfg(test)] put_block_stack_pointers: Default::default(), struct_member_pads: FastHashSet::default(), - force_bounded_loop_macro_name: String::default(), } } @@ -702,17 +696,11 @@ impl Writer { self.out } - /// Define a macro to invoke at the bottom of each loop body, to - /// defeat MSL infinite loop reasoning. - /// - /// If we haven't done so already, emit the definition of a preprocessor - /// macro to be invoked at the end of each loop body in the generated MSL, - /// to ensure that the MSL compiler's optimizations do not remove bounds - /// checks. - /// - /// Only the first call to this function for a given module actually causes - /// the macro definition to be written. Subsequent loops can simply use the - /// prior macro definition, since macros aren't block-scoped. + /// Generates statements to be inserted immediately before and at the very + /// start of the body of each loop, to defeat MSL infinite loop reasoning. + /// The 0th item of the returned tuple should be inserted immediately prior + /// to the loop and the 1st item should be inserted at the very start of + /// the loop body. /// /// # What is this trying to solve? /// @@ -780,7 +768,8 @@ impl Writer { /// but which in fact generates no instructions. Unfortunately, inline /// assembly is not handled correctly by some Metal device drivers. /// - /// Instead, we add the following code to the bottom of every loop: + /// A previously used approach was to add the following code to the bottom + /// of every loop: /// /// ```ignore /// if (volatile bool unpredictable = false; unpredictable) @@ -791,37 +780,47 @@ impl Writer { /// the `volatile` qualifier prevents the compiler from assuming this. Thus, /// it must assume that the `break` might be reached, and hence that the /// loop is not unbounded. This prevents the range analysis impact described - /// above. + /// above. Unfortunately this prevented the compiler from making important, + /// and safe, optimizations such as loop unrolling and was observed to + /// significantly hurt performance. /// - /// Unfortunately, what makes this a kludge, not a hack, is that this - /// solution leaves the GPU executing a pointless conditional branch, at - /// runtime, in every iteration of the loop. There's no part of the system - /// that has a global enough view to be sure that `unpredictable` is true, - /// and remove it from the code. Adding the branch also affects - /// optimization: for example, it's impossible to unroll this loop. This - /// transformation has been observed to significantly hurt performance. + /// Our current approach declares a counter before every loop and + /// increments it every iteration, breaking after 2^64 iterations: /// - /// To make our output a bit more legible, we pull the condition out into a - /// preprocessor macro defined at the top of the module. + /// ```ignore + /// uint2 loop_bound = uint2(0); + /// while (true) { + /// if (metal::all(loop_bound == uint2(4294967295))) { break; } + /// loop_bound += uint2(loop_bound.y == 4294967295, 1); + /// } + /// ``` + /// + /// This convinces the compiler that the loop is finite and therefore may + /// execute, whilst at the same time allowing optimizations such as loop + /// unrolling. Furthermore the 64-bit counter is large enough it seems + /// implausible that it would affect the execution of any shader. /// /// This approach is also used by Chromium WebGPU's Dawn shader compiler: - /// - fn emit_force_bounded_loop_macro(&mut self) -> BackendResult { - if !self.force_bounded_loop_macro_name.is_empty() { - return Ok(()); + /// + fn gen_force_bounded_loop_statements( + &mut self, + level: back::Level, + context: &StatementContext, + ) -> Option<(String, String)> { + if !context.expression.force_loop_bounding { + return None; } - self.force_bounded_loop_macro_name = self.namer.call("LOOP_IS_BOUNDED"); - let loop_bounded_volatile_name = self.namer.call("unpredictable_break_from_loop"); - writeln!( - self.out, - "#define {} {{ volatile bool {} = false; if ({}) break; }}", - self.force_bounded_loop_macro_name, - loop_bounded_volatile_name, - loop_bounded_volatile_name, - )?; + let loop_bound_name = self.namer.call("loop_bound"); + let decl = format!("{level}uint2 {loop_bound_name} = uint2(0u);"); + let level = level.next(); + let max = u32::MAX; + let break_and_inc = format!( + "{level}if ({NAMESPACE}::all({loop_bound_name} == uint2({max}u))) {{ break; }} +{level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);" + ); - Ok(()) + Some((decl, break_and_inc)) } fn put_call_parameters( @@ -1363,7 +1362,7 @@ impl Writer { Ok(()) } - /// Emit code for the sign(i32) expression. + /// Emit code for the isign expression. /// fn put_isign( &mut self, @@ -1371,18 +1370,23 @@ impl Writer { context: &ExpressionContext, ) -> BackendResult { write!(self.out, "{NAMESPACE}::select({NAMESPACE}::select(")?; + let scalar = context + .resolve_type(arg) + .scalar() + .expect("put_isign should only be called for args which have an integer scalar type") + .to_msl_name(); match context.resolve_type(arg) { &crate::TypeInner::Vector { size, .. } => { let size = back::vector_size_str(size); - write!(self.out, "int{size}(-1), int{size}(1)")?; + write!(self.out, "{scalar}{size}(-1), {scalar}{size}(1)")?; } _ => { - write!(self.out, "-1, 1")?; + write!(self.out, "{scalar}(-1), {scalar}(1)")?; } } write!(self.out, ", (")?; self.put_expression(arg, context, true)?; - write!(self.out, " > 0)), 0, (")?; + write!(self.out, " > 0)), {scalar}(0), (")?; self.put_expression(arg, context, true)?; write!(self.out, " == 0))")?; Ok(()) @@ -1611,7 +1615,12 @@ impl Writer { vector, pattern, } => { - self.put_wrapped_expression_for_packed_vec3_access(vector, context, false)?; + self.put_wrapped_expression_for_packed_vec3_access( + vector, + context, + false, + &Self::put_expression, + )?; write!(self.out, ".")?; for &sc in pattern[..size as usize].iter() { write!(self.out, "{}", back::COMPONENTS[sc as usize])?; @@ -1754,7 +1763,6 @@ impl Writer { write!(self.out, ")")?; } crate::Expression::Binary { op, left, right } => { - let op_str = back::binary_operation_str(op); let kind = context .resolve_type(left) .scalar_kind() @@ -1779,38 +1787,56 @@ impl Writer { write!(self.out, ", ")?; self.put_expression(right, context, true)?; write!(self.out, ")")?; - } else { - if !is_scoped { - write!(self.out, "(")?; - } - - // Cast packed vector if necessary - // Packed vector - matrix multiplications are not supported in MSL - if op == crate::BinaryOperator::Multiply - && matches!( - context.resolve_type(right), - &crate::TypeInner::Matrix { .. } - ) - { - self.put_wrapped_expression_for_packed_vec3_access(left, context, false)?; - } else { - self.put_expression(left, context, false)?; - } - - write!(self.out, " {op_str} ")?; - - // See comment above - if op == crate::BinaryOperator::Multiply - && matches!(context.resolve_type(left), &crate::TypeInner::Matrix { .. }) - { - self.put_wrapped_expression_for_packed_vec3_access(right, context, false)?; - } else { - self.put_expression(right, context, false)?; - } + } else if (op == crate::BinaryOperator::Add + || op == crate::BinaryOperator::Subtract + || op == crate::BinaryOperator::Multiply) + && kind == crate::ScalarKind::Sint + { + let to_unsigned = |ty: &crate::TypeInner| match *ty { + crate::TypeInner::Scalar(scalar) => { + Ok(crate::TypeInner::Scalar(crate::Scalar { + kind: crate::ScalarKind::Uint, + ..scalar + })) + } + crate::TypeInner::Vector { size, scalar } => Ok(crate::TypeInner::Vector { + size, + scalar: crate::Scalar { + kind: crate::ScalarKind::Uint, + ..scalar + }, + }), + _ => Err(Error::UnsupportedBitCast(ty.clone())), + }; - if !is_scoped { - write!(self.out, ")")?; - } + // Avoid undefined behaviour due to overflowing signed + // integer arithmetic. Cast the operands to unsigned prior + // to performing the operation, then cast the result back + // to signed. + self.put_bitcasted_expression( + context.resolve_type(expr_handle), + context, + &|writer, context, is_scoped| { + writer.put_binop( + op, + left, + right, + context, + is_scoped, + &|writer, expr, context, _is_scoped| { + writer.put_bitcasted_expression( + &to_unsigned(context.resolve_type(expr))?, + context, + &|writer, context, is_scoped| { + writer.put_expression(expr, context, is_scoped) + }, + ) + }, + ) + }, + )?; + } else { + self.put_binop(op, left, right, context, is_scoped, &Self::put_expression)?; } } crate::Expression::Select { @@ -2334,23 +2360,114 @@ impl Writer { Ok(()) } + /// Emits code for a binary operation, using the provided callback to emit + /// the left and right operands. + fn put_binop( + &mut self, + op: crate::BinaryOperator, + left: Handle, + right: Handle, + context: &ExpressionContext, + is_scoped: bool, + put_expression: &F, + ) -> BackendResult + where + F: Fn(&mut Self, Handle, &ExpressionContext, bool) -> BackendResult, + { + let op_str = back::binary_operation_str(op); + + if !is_scoped { + write!(self.out, "(")?; + } + + // Cast packed vector if necessary + // Packed vector - matrix multiplications are not supported in MSL + if op == crate::BinaryOperator::Multiply + && matches!( + context.resolve_type(right), + &crate::TypeInner::Matrix { .. } + ) + { + self.put_wrapped_expression_for_packed_vec3_access( + left, + context, + false, + put_expression, + )?; + } else { + put_expression(self, left, context, false)?; + } + + write!(self.out, " {op_str} ")?; + + // See comment above + if op == crate::BinaryOperator::Multiply + && matches!(context.resolve_type(left), &crate::TypeInner::Matrix { .. }) + { + self.put_wrapped_expression_for_packed_vec3_access( + right, + context, + false, + put_expression, + )?; + } else { + put_expression(self, right, context, false)?; + } + + if !is_scoped { + write!(self.out, ")")?; + } + + Ok(()) + } + /// Used by expressions like Swizzle and Binary since they need packed_vec3's to be casted to a vec3 - fn put_wrapped_expression_for_packed_vec3_access( + fn put_wrapped_expression_for_packed_vec3_access( &mut self, expr_handle: Handle, context: &ExpressionContext, is_scoped: bool, - ) -> BackendResult { + put_expression: &F, + ) -> BackendResult + where + F: Fn(&mut Self, Handle, &ExpressionContext, bool) -> BackendResult, + { if let Some(scalar) = context.get_packed_vec_kind(expr_handle) { write!(self.out, "{}::{}3(", NAMESPACE, scalar.to_msl_name())?; - self.put_expression(expr_handle, context, is_scoped)?; + put_expression(self, expr_handle, context, is_scoped)?; write!(self.out, ")")?; } else { - self.put_expression(expr_handle, context, is_scoped)?; + put_expression(self, expr_handle, context, is_scoped)?; } Ok(()) } + /// Emits code for an expression using the provided callback, wrapping the + /// result in a bitcast to the type `cast_to`. + fn put_bitcasted_expression( + &mut self, + cast_to: &crate::TypeInner, + context: &ExpressionContext, + put_expression: &F, + ) -> BackendResult + where + F: Fn(&mut Self, &ExpressionContext, bool) -> BackendResult, + { + write!(self.out, "as_type<")?; + match *cast_to { + crate::TypeInner::Scalar(scalar) => put_numeric_type(&mut self.out, scalar, &[])?, + crate::TypeInner::Vector { size, scalar } => { + put_numeric_type(&mut self.out, scalar, &[size])? + } + _ => return Err(Error::UnsupportedBitCast(cast_to.clone())), + }; + write!(self.out, ">(")?; + put_expression(self, context, true)?; + write!(self.out, ")")?; + + Ok(()) + } + /// Write a `GuardedIndex` as a Metal expression. fn put_index( &mut self, @@ -3092,10 +3209,23 @@ impl Writer { ref continuing, break_if, } => { - if !continuing.is_empty() || break_if.is_some() { - let gate_name = self.namer.call("loop_init"); + let force_loop_bound_statements = + self.gen_force_bounded_loop_statements(level, context); + let gate_name = (!continuing.is_empty() || break_if.is_some()) + .then(|| self.namer.call("loop_init")); + + if let Some((ref decl, _)) = force_loop_bound_statements { + writeln!(self.out, "{decl}")?; + } + if let Some(ref gate_name) = gate_name { writeln!(self.out, "{level}bool {gate_name} = true;")?; - writeln!(self.out, "{level}while(true) {{",)?; + } + + writeln!(self.out, "{level}while(true) {{",)?; + if let Some((_, ref break_and_inc)) = force_loop_bound_statements { + writeln!(self.out, "{break_and_inc}")?; + } + if let Some(ref gate_name) = gate_name { let lif = level.next(); let lcontinuing = lif.next(); writeln!(self.out, "{lif}if (!{gate_name}) {{")?; @@ -3109,19 +3239,9 @@ impl Writer { } writeln!(self.out, "{lif}}}")?; writeln!(self.out, "{lif}{gate_name} = false;")?; - } else { - writeln!(self.out, "{level}while(true) {{",)?; } self.put_block(level.next(), body, context)?; - if context.expression.force_loop_bounding { - self.emit_force_bounded_loop_macro()?; - writeln!( - self.out, - "{}{}", - level.next(), - self.force_bounded_loop_macro_name - )?; - } + writeln!(self.out, "{level}}}")?; } crate::Statement::Break => { @@ -3615,7 +3735,6 @@ impl Writer { &[CLAMPED_LOD_LOAD_PREFIX], &mut self.names, ); - self.force_bounded_loop_macro_name.clear(); self.struct_member_pads.clear(); writeln!( @@ -5214,8 +5333,7 @@ template None => false, }; if !good { - ep_error = - Some(super::EntryPointError::MissingBindTarget(br.clone())); + ep_error = Some(super::EntryPointError::MissingBindTarget(*br)); break; } } diff --git a/naga/src/back/pipeline_constants.rs b/naga/src/back/pipeline_constants.rs index e89964c49c..f1d18fbe8d 100644 --- a/naga/src/back/pipeline_constants.rs +++ b/naga/src/back/pipeline_constants.rs @@ -6,7 +6,8 @@ use crate::{ Arena, Block, Constant, Expression, Function, Handle, Literal, Module, Override, Range, Scalar, Span, Statement, TypeInner, WithSpan, }; -use std::{borrow::Cow, collections::HashSet, mem}; +use hashbrown::HashSet; +use std::{borrow::Cow, mem}; use thiserror::Error; #[derive(Error, Debug, Clone)] @@ -304,7 +305,7 @@ fn process_workgroup_size_override( /// /// Add the new `Constant` to `override_map` and `adjusted_constant_initializers`. fn process_override( - (old_h, override_, span): (Handle, Override, Span), + (old_h, r#override, span): (Handle, Override, Span), pipeline_constants: &PipelineConstants, module: &mut Module, override_map: &mut HandleVec>, @@ -312,20 +313,20 @@ fn process_override( adjusted_constant_initializers: &mut HashSet>, global_expression_kind_tracker: &mut crate::proc::ExpressionKindTracker, ) -> Result, PipelineConstantError> { - // Determine which key to use for `override_` in `pipeline_constants`. - let key = if let Some(id) = override_.id { + // Determine which key to use for `r#override` in `pipeline_constants`. + let key = if let Some(id) = r#override.id { Cow::Owned(id.to_string()) - } else if let Some(ref name) = override_.name { + } else if let Some(ref name) = r#override.name { Cow::Borrowed(name) } else { unreachable!(); }; - // Generate a global expression for `override_`'s value, either + // Generate a global expression for `r#override`'s value, either // from the provided `pipeline_constants` table or its initializer // in the module. let init = if let Some(value) = pipeline_constants.get::(&key) { - let literal = match module.types[override_.ty].inner { + let literal = match module.types[r#override.ty].inner { TypeInner::Scalar(scalar) => map_value_to_literal(*value, scalar)?, _ => unreachable!(), }; @@ -334,7 +335,7 @@ fn process_override( .append(Expression::Literal(literal), Span::UNDEFINED); global_expression_kind_tracker.insert(expr, crate::proc::ExpressionKind::Const); expr - } else if let Some(init) = override_.init { + } else if let Some(init) = r#override.init { adjusted_global_expressions[init] } else { return Err(PipelineConstantError::MissingValue(key.to_string())); @@ -342,8 +343,8 @@ fn process_override( // Generate a new `Constant` to represent the override's value. let constant = Constant { - name: override_.name, - ty: override_.ty, + name: r#override.name, + ty: r#override.ty, init, }; let h = module.constants.append(constant, span); diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 80baa22a44..b4cceb8b48 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -1735,7 +1735,20 @@ impl BlockContext<'_> { } crate::Expression::ArrayLength(expr) => self.write_runtime_array_length(expr, block)?, crate::Expression::RayQueryGetIntersection { query, committed } => { - self.write_ray_query_get_intersection(query, block, committed) + let query_id = self.cached[query]; + let func_id = self + .writer + .write_ray_query_get_intersection_function(committed, self.ir_module); + let ray_intersection = self.ir_module.special_types.ray_intersection.unwrap(); + let intersection_type_id = self.get_type_id(LookupType::Handle(ray_intersection)); + let id = self.gen_id(); + block.body.push(Instruction::function_call( + intersection_type_id, + id, + func_id, + &[query_id], + )); + id } crate::Expression::RayQueryVertexPositions { query, committed } => { self.write_ray_query_return_vertex_position(query, block, committed) diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 4abed48362..518d240ed6 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -772,6 +772,8 @@ pub struct Writer { // Just a temporary list of SPIR-V ids temp_list: Vec, + + ray_get_intersection_function: Option, } bitflags::bitflags! { diff --git a/naga/src/back/spv/ray.rs b/naga/src/back/spv/ray.rs index 6c41f509b0..5fb4acd8a2 100644 --- a/naga/src/back/spv/ray.rs +++ b/naga/src/back/spv/ray.rs @@ -2,8 +2,527 @@ Generating SPIR-V for ray query operations. */ -use super::{Block, BlockContext, Instruction, LocalType, LookupType, NumericType}; +use super::{ + Block, BlockContext, Function, FunctionArgument, Instruction, LocalType, LookupFunctionType, + LookupType, NumericType, Writer, +}; use crate::arena::Handle; +use crate::{Type, TypeInner}; + +impl Writer { + pub(super) fn write_ray_query_get_intersection_function( + &mut self, + is_committed: bool, + ir_module: &crate::Module, + ) -> spirv::Word { + if let Some(func_id) = self.ray_get_intersection_function { + return func_id; + } + let ray_intersection = ir_module.special_types.ray_intersection.unwrap(); + let intersection_type_id = self.get_type_id(LookupType::Handle(ray_intersection)); + let intersection_pointer_type_id = + self.get_type_id(LookupType::Local(LocalType::Pointer { + base: ray_intersection, + class: spirv::StorageClass::Function, + })); + + let flag_type_id = self.get_type_id(LookupType::Local(LocalType::Numeric( + NumericType::Scalar(crate::Scalar::U32), + ))); + let flag_type = ir_module + .types + .get(&Type { + name: None, + inner: TypeInner::Scalar(crate::Scalar::U32), + }) + .unwrap(); + let flag_pointer_type_id = self.get_type_id(LookupType::Local(LocalType::Pointer { + base: flag_type, + class: spirv::StorageClass::Function, + })); + + let transform_type_id = + self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Tri, + scalar: crate::Scalar::F32, + }))); + let transform_type = ir_module + .types + .get(&Type { + name: None, + inner: TypeInner::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Tri, + scalar: crate::Scalar::F32, + }, + }) + .unwrap(); + let transform_pointer_type_id = self.get_type_id(LookupType::Local(LocalType::Pointer { + base: transform_type, + class: spirv::StorageClass::Function, + })); + + let barycentrics_type_id = + self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector { + size: crate::VectorSize::Bi, + scalar: crate::Scalar::F32, + }))); + let barycentrics_type = ir_module + .types + .get(&Type { + name: None, + inner: TypeInner::Vector { + size: crate::VectorSize::Bi, + scalar: crate::Scalar::F32, + }, + }) + .unwrap(); + let barycentrics_pointer_type_id = + self.get_type_id(LookupType::Local(LocalType::Pointer { + base: barycentrics_type, + class: spirv::StorageClass::Function, + })); + + let bool_type_id = self.get_type_id(LookupType::Local(LocalType::Numeric( + NumericType::Scalar(crate::Scalar::BOOL), + ))); + let bool_type = ir_module + .types + .get(&Type { + name: None, + inner: TypeInner::Scalar(crate::Scalar::BOOL), + }) + .unwrap(); + let bool_pointer_type_id = self.get_type_id(LookupType::Local(LocalType::Pointer { + base: bool_type, + class: spirv::StorageClass::Function, + })); + + let scalar_type_id = self.get_type_id(LookupType::Local(LocalType::Numeric( + NumericType::Scalar(crate::Scalar::F32), + ))); + let float_type = ir_module + .types + .get(&Type { + name: None, + inner: TypeInner::Scalar(crate::Scalar::F32), + }) + .unwrap(); + let float_pointer_type_id = self.get_type_id(LookupType::Local(LocalType::Pointer { + base: float_type, + class: spirv::StorageClass::Function, + })); + + let rq_ty = ir_module + .types + .get(&Type { + name: None, + inner: TypeInner::RayQuery { vertex_return: false }, + }) + .or_else(|| { + ir_module + .types + .get(&Type { + name: None, + inner: TypeInner::RayQuery { vertex_return: true }, + }) + }) + .expect("ray_query type should have been populated by the variable passed into this!"); + let argument_type_id = self.get_type_id(LookupType::Local(LocalType::Pointer { + base: rq_ty, + class: spirv::StorageClass::Function, + })); + let func_ty = self.get_function_type(LookupFunctionType { + parameter_type_ids: vec![argument_type_id], + return_type_id: intersection_type_id, + }); + + let mut function = Function::default(); + let func_id = self.id_gen.next(); + function.signature = Some(Instruction::function( + intersection_type_id, + func_id, + spirv::FunctionControl::empty(), + func_ty, + )); + let blank_intersection = self.get_constant_null(intersection_type_id); + let query_id = self.id_gen.next(); + let instruction = Instruction::function_parameter(argument_type_id, query_id); + function.parameters.push(FunctionArgument { + instruction, + handle_id: 0, + }); + + let label_id = self.id_gen.next(); + let mut block = Block::new(label_id); + + let blank_intersection_id = self.id_gen.next(); + block.body.push(Instruction::variable( + intersection_pointer_type_id, + blank_intersection_id, + spirv::StorageClass::Function, + Some(blank_intersection), + )); + + let intersection_id = self.get_constant_scalar(crate::Literal::U32(if is_committed { + spirv::RayQueryIntersection::RayQueryCommittedIntersectionKHR + } else { + spirv::RayQueryIntersection::RayQueryCandidateIntersectionKHR + } as _)); + let raw_kind_id = self.id_gen.next(); + block.body.push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionTypeKHR, + flag_type_id, + raw_kind_id, + query_id, + intersection_id, + )); + let kind_id = if is_committed { + // Nothing to do: the IR value matches `spirv::RayQueryCommittedIntersectionType` + raw_kind_id + } else { + // Remap from the candidate kind to IR + let condition_id = self.id_gen.next(); + let committed_triangle_kind_id = self.get_constant_scalar(crate::Literal::U32( + spirv::RayQueryCandidateIntersectionType::RayQueryCandidateIntersectionTriangleKHR + as _, + )); + block.body.push(Instruction::binary( + spirv::Op::IEqual, + self.get_bool_type_id(), + condition_id, + raw_kind_id, + committed_triangle_kind_id, + )); + let kind_id = self.id_gen.next(); + block.body.push(Instruction::select( + flag_type_id, + kind_id, + condition_id, + self.get_constant_scalar(crate::Literal::U32( + crate::RayQueryIntersection::Triangle as _, + )), + self.get_constant_scalar(crate::Literal::U32( + crate::RayQueryIntersection::Aabb as _, + )), + )); + kind_id + }; + let idx_id = self.get_index_constant(0); + let access_idx = self.id_gen.next(); + block.body.push(Instruction::access_chain( + flag_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + block + .body + .push(Instruction::store(access_idx, kind_id, None)); + + let not_none_comp_id = self.id_gen.next(); + let none_id = + self.get_constant_scalar(crate::Literal::U32(crate::RayQueryIntersection::None as _)); + block.body.push(Instruction::binary( + spirv::Op::INotEqual, + self.get_bool_type_id(), + not_none_comp_id, + kind_id, + none_id, + )); + + let not_none_label_id = self.id_gen.next(); + let mut not_none_block = Block::new(not_none_label_id); + + let final_label_id = self.id_gen.next(); + let mut final_block = Block::new(final_label_id); + + block.body.push(Instruction::selection_merge( + final_label_id, + spirv::SelectionControl::NONE, + )); + function.consume( + block, + Instruction::branch_conditional(not_none_comp_id, not_none_label_id, final_label_id), + ); + + let instance_custom_index_id = self.id_gen.next(); + not_none_block + .body + .push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionInstanceCustomIndexKHR, + flag_type_id, + instance_custom_index_id, + query_id, + intersection_id, + )); + let instance_id = self.id_gen.next(); + not_none_block + .body + .push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionInstanceIdKHR, + flag_type_id, + instance_id, + query_id, + intersection_id, + )); + let sbt_record_offset_id = self.id_gen.next(); + not_none_block + .body + .push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR, + flag_type_id, + sbt_record_offset_id, + query_id, + intersection_id, + )); + let geometry_index_id = self.id_gen.next(); + not_none_block + .body + .push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionGeometryIndexKHR, + flag_type_id, + geometry_index_id, + query_id, + intersection_id, + )); + let primitive_index_id = self.id_gen.next(); + not_none_block + .body + .push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionPrimitiveIndexKHR, + flag_type_id, + primitive_index_id, + query_id, + intersection_id, + )); + + //Note: there is also `OpRayQueryGetIntersectionCandidateAABBOpaqueKHR`, + // but it's not a property of an intersection. + + let object_to_world_id = self.id_gen.next(); + not_none_block + .body + .push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionObjectToWorldKHR, + transform_type_id, + object_to_world_id, + query_id, + intersection_id, + )); + let world_to_object_id = self.id_gen.next(); + not_none_block + .body + .push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionWorldToObjectKHR, + transform_type_id, + world_to_object_id, + query_id, + intersection_id, + )); + + // instance custom index + let idx_id = self.get_index_constant(2); + let access_idx = self.id_gen.next(); + not_none_block.body.push(Instruction::access_chain( + flag_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + not_none_block.body.push(Instruction::store( + access_idx, + instance_custom_index_id, + None, + )); + + // instance + let idx_id = self.get_index_constant(3); + let access_idx = self.id_gen.next(); + not_none_block.body.push(Instruction::access_chain( + flag_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + not_none_block + .body + .push(Instruction::store(access_idx, instance_id, None)); + + let idx_id = self.get_index_constant(4); + let access_idx = self.id_gen.next(); + not_none_block.body.push(Instruction::access_chain( + flag_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + not_none_block + .body + .push(Instruction::store(access_idx, sbt_record_offset_id, None)); + + let idx_id = self.get_index_constant(5); + let access_idx = self.id_gen.next(); + not_none_block.body.push(Instruction::access_chain( + flag_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + not_none_block + .body + .push(Instruction::store(access_idx, geometry_index_id, None)); + + let idx_id = self.get_index_constant(6); + let access_idx = self.id_gen.next(); + not_none_block.body.push(Instruction::access_chain( + flag_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + not_none_block + .body + .push(Instruction::store(access_idx, primitive_index_id, None)); + + let idx_id = self.get_index_constant(9); + let access_idx = self.id_gen.next(); + not_none_block.body.push(Instruction::access_chain( + transform_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + not_none_block + .body + .push(Instruction::store(access_idx, object_to_world_id, None)); + + let idx_id = self.get_index_constant(10); + let access_idx = self.id_gen.next(); + not_none_block.body.push(Instruction::access_chain( + transform_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + not_none_block + .body + .push(Instruction::store(access_idx, world_to_object_id, None)); + + let tri_comp_id = self.id_gen.next(); + let tri_id = self.get_constant_scalar(crate::Literal::U32( + crate::RayQueryIntersection::Triangle as _, + )); + not_none_block.body.push(Instruction::binary( + spirv::Op::IEqual, + self.get_bool_type_id(), + tri_comp_id, + kind_id, + tri_id, + )); + + let tri_label_id = self.id_gen.next(); + let mut tri_block = Block::new(tri_label_id); + + let merge_label_id = self.id_gen.next(); + let merge_block = Block::new(merge_label_id); + // t + { + let block = if is_committed { + &mut not_none_block + } else { + &mut tri_block + }; + let t_id = self.id_gen.next(); + block.body.push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionTKHR, + scalar_type_id, + t_id, + query_id, + intersection_id, + )); + let idx_id = self.get_index_constant(1); + let access_idx = self.id_gen.next(); + block.body.push(Instruction::access_chain( + float_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + block.body.push(Instruction::store(access_idx, t_id, None)); + } + not_none_block.body.push(Instruction::selection_merge( + merge_label_id, + spirv::SelectionControl::NONE, + )); + function.consume( + not_none_block, + Instruction::branch_conditional(not_none_comp_id, tri_label_id, merge_label_id), + ); + + let barycentrics_id = self.id_gen.next(); + tri_block.body.push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionBarycentricsKHR, + barycentrics_type_id, + barycentrics_id, + query_id, + intersection_id, + )); + + let front_face_id = self.id_gen.next(); + tri_block.body.push(Instruction::ray_query_get_intersection( + spirv::Op::RayQueryGetIntersectionFrontFaceKHR, + bool_type_id, + front_face_id, + query_id, + intersection_id, + )); + + let idx_id = self.get_index_constant(7); + let access_idx = self.id_gen.next(); + tri_block.body.push(Instruction::access_chain( + barycentrics_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + tri_block + .body + .push(Instruction::store(access_idx, barycentrics_id, None)); + + let idx_id = self.get_index_constant(8); + let access_idx = self.id_gen.next(); + tri_block.body.push(Instruction::access_chain( + bool_pointer_type_id, + access_idx, + blank_intersection_id, + &[idx_id], + )); + tri_block + .body + .push(Instruction::store(access_idx, front_face_id, None)); + function.consume(tri_block, Instruction::branch(merge_label_id)); + function.consume(merge_block, Instruction::branch(final_label_id)); + + let loaded_blank_intersection_id = self.id_gen.next(); + final_block.body.push(Instruction::load( + intersection_type_id, + loaded_blank_intersection_id, + blank_intersection_id, + None, + )); + function.consume( + final_block, + Instruction::return_value(loaded_blank_intersection_id), + ); + + function.to_words(&mut self.logical_layout.function_definitions); + Instruction::function_end().to_words(&mut self.logical_layout.function_definitions); + self.ray_get_intersection_function = Some(func_id); + func_id + } +} impl BlockContext<'_> { pub(super) fn write_ray_query_function( @@ -102,193 +621,6 @@ impl BlockContext<'_> { } } - pub(super) fn write_ray_query_get_intersection( - &mut self, - query: Handle, - block: &mut Block, - is_committed: bool, - ) -> spirv::Word { - let query_id = self.cached[query]; - let intersection_id = - self.writer - .get_constant_scalar(crate::Literal::U32(if is_committed { - spirv::RayQueryIntersection::RayQueryCommittedIntersectionKHR - } else { - spirv::RayQueryIntersection::RayQueryCandidateIntersectionKHR - } as _)); - - let flag_type_id = self.get_type_id(LookupType::Local(LocalType::Numeric( - NumericType::Scalar(crate::Scalar::U32), - ))); - let raw_kind_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionTypeKHR, - flag_type_id, - raw_kind_id, - query_id, - intersection_id, - )); - let kind_id = if is_committed { - // Nothing to do: the IR value matches `spirv::RayQueryCommittedIntersectionType` - raw_kind_id - } else { - // Remap from the candidate kind to IR - let condition_id = self.gen_id(); - let committed_triangle_kind_id = self.writer.get_constant_scalar(crate::Literal::U32( - spirv::RayQueryCandidateIntersectionType::RayQueryCandidateIntersectionTriangleKHR - as _, - )); - block.body.push(Instruction::binary( - spirv::Op::IEqual, - self.writer.get_bool_type_id(), - condition_id, - raw_kind_id, - committed_triangle_kind_id, - )); - let kind_id = self.gen_id(); - block.body.push(Instruction::select( - flag_type_id, - kind_id, - condition_id, - self.writer.get_constant_scalar(crate::Literal::U32( - crate::RayQueryIntersection::Triangle as _, - )), - self.writer.get_constant_scalar(crate::Literal::U32( - crate::RayQueryIntersection::Aabb as _, - )), - )); - kind_id - }; - - let instance_custom_index_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionInstanceCustomIndexKHR, - flag_type_id, - instance_custom_index_id, - query_id, - intersection_id, - )); - let instance_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionInstanceIdKHR, - flag_type_id, - instance_id, - query_id, - intersection_id, - )); - let sbt_record_offset_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR, - flag_type_id, - sbt_record_offset_id, - query_id, - intersection_id, - )); - let geometry_index_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionGeometryIndexKHR, - flag_type_id, - geometry_index_id, - query_id, - intersection_id, - )); - let primitive_index_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionPrimitiveIndexKHR, - flag_type_id, - primitive_index_id, - query_id, - intersection_id, - )); - - let scalar_type_id = self.get_type_id(LookupType::Local(LocalType::Numeric( - NumericType::Scalar(crate::Scalar::F32), - ))); - let t_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionTKHR, - scalar_type_id, - t_id, - query_id, - intersection_id, - )); - - let barycentrics_type_id = - self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector { - size: crate::VectorSize::Bi, - scalar: crate::Scalar::F32, - }))); - let barycentrics_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionBarycentricsKHR, - barycentrics_type_id, - barycentrics_id, - query_id, - intersection_id, - )); - - let bool_type_id = self.get_type_id(LookupType::Local(LocalType::Numeric( - NumericType::Scalar(crate::Scalar::BOOL), - ))); - let front_face_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionFrontFaceKHR, - bool_type_id, - front_face_id, - query_id, - intersection_id, - )); - //Note: there is also `OpRayQueryGetIntersectionCandidateAABBOpaqueKHR`, - // but it's not a property of an intersection. - - let transform_type_id = - self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Matrix { - columns: crate::VectorSize::Quad, - rows: crate::VectorSize::Tri, - scalar: crate::Scalar::F32, - }))); - let object_to_world_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionObjectToWorldKHR, - transform_type_id, - object_to_world_id, - query_id, - intersection_id, - )); - let world_to_object_id = self.gen_id(); - block.body.push(Instruction::ray_query_get_intersection( - spirv::Op::RayQueryGetIntersectionWorldToObjectKHR, - transform_type_id, - world_to_object_id, - query_id, - intersection_id, - )); - - let id = self.gen_id(); - let intersection_type_id = self.get_type_id(LookupType::Handle( - self.ir_module.special_types.ray_intersection.unwrap(), - )); - //Note: the arguments must match `generate_ray_intersection_type` layout - block.body.push(Instruction::composite_construct( - intersection_type_id, - id, - &[ - kind_id, - t_id, - instance_custom_index_id, - instance_id, - sbt_record_offset_id, - geometry_index_id, - primitive_index_id, - barycentrics_id, - front_face_id, - object_to_world_id, - world_to_object_id, - ], - )); - id - } - pub(super) fn write_ray_query_return_vertex_position( &mut self, query: Handle, diff --git a/naga/src/back/spv/recyclable.rs b/naga/src/back/spv/recyclable.rs index 7e7ad5d817..8ccc7406e2 100644 --- a/naga/src/back/spv/recyclable.rs +++ b/naga/src/back/spv/recyclable.rs @@ -38,14 +38,14 @@ impl Recyclable for Vec { } } -impl Recyclable for std::collections::HashMap { +impl Recyclable for hashbrown::HashMap { fn recycle(mut self) -> Self { self.clear(); self } } -impl Recyclable for std::collections::HashSet { +impl Recyclable for hashbrown::HashSet { fn recycle(mut self) -> Self { self.clear(); self diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 837350a312..b68d9af2f3 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -12,8 +12,8 @@ use crate::{ proc::{Alignment, TypeResolution}, valid::{FunctionInfo, ModuleInfo}, }; +use hashbrown::hash_map::Entry; use spirv::Word; -use std::collections::hash_map::Entry; struct FunctionInterface<'a> { varying_ids: &'a mut Vec, @@ -21,7 +21,7 @@ struct FunctionInterface<'a> { } impl Function { - fn to_words(&self, sink: &mut impl Extend) { + pub(super) fn to_words(&self, sink: &mut impl Extend) { self.signature.as_ref().unwrap().to_words(sink); for argument in self.parameters.iter() { argument.instruction.to_words(sink); @@ -81,6 +81,7 @@ impl Writer { saved_cached: CachedExpressions::default(), gl450_ext_inst_id, temp_list: Vec::new(), + ray_get_intersection_function: None, }) } @@ -131,6 +132,7 @@ impl Writer { global_variables: take(&mut self.global_variables).recycle(), saved_cached: take(&mut self.saved_cached).recycle(), temp_list: take(&mut self.temp_list).recycle(), + ray_get_intersection_function: None, }; *self = fresh; @@ -165,7 +167,11 @@ impl Writer { let selected = match self.capabilities_available { None => first, Some(ref available) => { - match capabilities.iter().find(|cap| available.contains(cap)) { + match capabilities + .iter() + // need explicit type for hashbrown::HashSet::contains fn call to keep rustc happy + .find(|cap| available.contains::(cap)) + { Some(&cap) => cap, None => { return Err(Error::MissingCapabilities(what, capabilities.to_vec())) @@ -1862,7 +1868,7 @@ impl Writer { Ok(()) } - fn get_function_type(&mut self, lookup_function_type: LookupFunctionType) -> Word { + pub(super) fn get_function_type(&mut self, lookup_function_type: LookupFunctionType) -> Word { match self .lookup_function_type .entry(lookup_function_type.clone()) diff --git a/naga/src/compact/expressions.rs b/naga/src/compact/expressions.rs index b4196816d2..34809297c3 100644 --- a/naga/src/compact/expressions.rs +++ b/naga/src/compact/expressions.rs @@ -3,6 +3,7 @@ use crate::arena::{Arena, Handle}; pub struct ExpressionTracer<'tracer> { pub constants: &'tracer Arena, + pub overrides: &'tracer Arena, /// The arena in which we are currently tracing expressions. pub expressions: &'tracer Arena, @@ -13,6 +14,9 @@ pub struct ExpressionTracer<'tracer> { /// The used map for `constants`. pub constants_used: &'tracer mut HandleSet, + /// The used map for `overrides`. + pub overrides_used: &'tracer mut HandleSet, + /// The used set for `arena`. /// /// This points to whatever arena holds the expressions we are @@ -62,174 +66,184 @@ impl ExpressionTracer<'_> { } log::trace!("tracing new expression {:?}", expr); + self.trace_expression(expr); + } + } - use crate::Expression as Ex; - match *expr { - // Expressions that do not contain handles that need to be traced. - Ex::Literal(_) - | Ex::FunctionArgument(_) - | Ex::GlobalVariable(_) - | Ex::LocalVariable(_) - | Ex::CallResult(_) - | Ex::SubgroupBallotResult - | Ex::RayQueryProceedResult => {} + pub fn trace_expression(&mut self, expr: &crate::Expression) { + use crate::Expression as Ex; + match *expr { + // Expressions that do not contain handles that need to be traced. + Ex::Literal(_) + | Ex::FunctionArgument(_) + | Ex::GlobalVariable(_) + | Ex::LocalVariable(_) + | Ex::CallResult(_) + | Ex::SubgroupBallotResult + | Ex::RayQueryProceedResult => {} - Ex::Constant(handle) => { - self.constants_used.insert(handle); - // Constants and expressions are mutually recursive, which - // complicates our nice one-pass algorithm. However, since - // constants don't refer to each other, we can get around - // this by looking *through* each constant and marking its - // initializer as used. Since `expr` refers to the constant, - // and the constant refers to the initializer, it must - // precede `expr` in the arena. - let init = self.constants[handle].init; + // Expressions can refer to constants and overrides, which can refer + // in turn to expressions, which complicates our nice one-pass + // algorithm. But since constants and overrides don't refer to each + // other directly, only via expressions, we can get around this by + // looking *through* each constant/override and marking its + // initializer expression as used immediately. Since `expr` refers + // to the constant/override, which then refers to the initializer, + // the initializer must precede `expr` in the arena, so we know we + // have yet to visit the initializer, so it's not too late to mark + // it. + Ex::Constant(handle) => { + self.constants_used.insert(handle); + let init = self.constants[handle].init; + match self.global_expressions_used { + Some(ref mut used) => used.insert(init), + None => self.expressions_used.insert(init), + }; + } + Ex::Override(handle) => { + self.overrides_used.insert(handle); + if let Some(init) = self.overrides[handle].init { match self.global_expressions_used { Some(ref mut used) => used.insert(init), None => self.expressions_used.insert(init), }; } - Ex::Override(_) => { - // All overrides are considered used by definition. We mark - // their types and initialization expressions as used in - // `compact::compact`, so we have no more work to do here. - } - Ex::ZeroValue(ty) => { - self.types_used.insert(ty); - } - Ex::Compose { ty, ref components } => { - self.types_used.insert(ty); - self.expressions_used - .insert_iter(components.iter().cloned()); - } - Ex::Access { base, index } => self.expressions_used.insert_iter([base, index]), - Ex::AccessIndex { base, index: _ } => { - self.expressions_used.insert(base); - } - Ex::Splat { size: _, value } => { - self.expressions_used.insert(value); - } - Ex::Swizzle { - size: _, - vector, - pattern: _, - } => { - self.expressions_used.insert(vector); - } - Ex::Load { pointer } => { - self.expressions_used.insert(pointer); - } - Ex::ImageSample { - image, - sampler, - gather: _, - coordinate, - array_index, - offset, - ref level, - depth_ref, - } => { - self.expressions_used - .insert_iter([image, sampler, coordinate]); - self.expressions_used.insert_iter(array_index); - match self.global_expressions_used { - Some(ref mut used) => used.insert_iter(offset), - None => self.expressions_used.insert_iter(offset), - } - use crate::SampleLevel as Sl; - match *level { - Sl::Auto | Sl::Zero => {} - Sl::Exact(expr) | Sl::Bias(expr) => { - self.expressions_used.insert(expr); - } - Sl::Gradient { x, y } => self.expressions_used.insert_iter([x, y]), - } - self.expressions_used.insert_iter(depth_ref); - } - Ex::ImageLoad { - image, - coordinate, - array_index, - sample, - level, - } => { - self.expressions_used.insert(image); - self.expressions_used.insert(coordinate); - self.expressions_used.insert_iter(array_index); - self.expressions_used.insert_iter(sample); - self.expressions_used.insert_iter(level); + } + Ex::ZeroValue(ty) => { + self.types_used.insert(ty); + } + Ex::Compose { ty, ref components } => { + self.types_used.insert(ty); + self.expressions_used + .insert_iter(components.iter().cloned()); + } + Ex::Access { base, index } => self.expressions_used.insert_iter([base, index]), + Ex::AccessIndex { base, index: _ } => { + self.expressions_used.insert(base); + } + Ex::Splat { size: _, value } => { + self.expressions_used.insert(value); + } + Ex::Swizzle { + size: _, + vector, + pattern: _, + } => { + self.expressions_used.insert(vector); + } + Ex::Load { pointer } => { + self.expressions_used.insert(pointer); + } + Ex::ImageSample { + image, + sampler, + gather: _, + coordinate, + array_index, + offset, + ref level, + depth_ref, + } => { + self.expressions_used + .insert_iter([image, sampler, coordinate]); + self.expressions_used.insert_iter(array_index); + match self.global_expressions_used { + Some(ref mut used) => used.insert_iter(offset), + None => self.expressions_used.insert_iter(offset), } - Ex::ImageQuery { image, ref query } => { - self.expressions_used.insert(image); - use crate::ImageQuery as Iq; - match *query { - Iq::Size { level } => self.expressions_used.insert_iter(level), - Iq::NumLevels | Iq::NumLayers | Iq::NumSamples => {} + use crate::SampleLevel as Sl; + match *level { + Sl::Auto | Sl::Zero => {} + Sl::Exact(expr) | Sl::Bias(expr) => { + self.expressions_used.insert(expr); } + Sl::Gradient { x, y } => self.expressions_used.insert_iter([x, y]), } - Ex::Unary { op: _, expr } => { - self.expressions_used.insert(expr); - } - Ex::Binary { op: _, left, right } => { - self.expressions_used.insert_iter([left, right]); - } - Ex::Select { - condition, - accept, - reject, - } => self - .expressions_used - .insert_iter([condition, accept, reject]), - Ex::Derivative { - axis: _, - ctrl: _, - expr, - } => { - self.expressions_used.insert(expr); - } - Ex::Relational { fun: _, argument } => { - self.expressions_used.insert(argument); - } - Ex::Math { - fun: _, - arg, - arg1, - arg2, - arg3, - } => { - self.expressions_used.insert(arg); - self.expressions_used.insert_iter(arg1); - self.expressions_used.insert_iter(arg2); - self.expressions_used.insert_iter(arg3); - } - Ex::As { - expr, - kind: _, - convert: _, - } => { - self.expressions_used.insert(expr); - } - Ex::ArrayLength(expr) => { - self.expressions_used.insert(expr); - } - Ex::AtomicResult { ty, comparison: _ } - | Ex::WorkGroupUniformLoadResult { ty } - | Ex::SubgroupOperationResult { ty } => { - self.types_used.insert(ty); - } - Ex::RayQueryGetIntersection { - query, - committed: _, - } => { - self.expressions_used.insert(query); - } - Ex::RayQueryVertexPositions { - query, - committed: _, - } => { - self.expressions_used.insert(query); + self.expressions_used.insert_iter(depth_ref); + } + Ex::ImageLoad { + image, + coordinate, + array_index, + sample, + level, + } => { + self.expressions_used.insert(image); + self.expressions_used.insert(coordinate); + self.expressions_used.insert_iter(array_index); + self.expressions_used.insert_iter(sample); + self.expressions_used.insert_iter(level); + } + Ex::ImageQuery { image, ref query } => { + self.expressions_used.insert(image); + use crate::ImageQuery as Iq; + match *query { + Iq::Size { level } => self.expressions_used.insert_iter(level), + Iq::NumLevels | Iq::NumLayers | Iq::NumSamples => {} } } + Ex::RayQueryVertexPositions { + query, + committed: _, + } => { + self.expressions_used.insert(query); + } + Ex::Unary { op: _, expr } => { + self.expressions_used.insert(expr); + } + Ex::Binary { op: _, left, right } => { + self.expressions_used.insert_iter([left, right]); + } + Ex::Select { + condition, + accept, + reject, + } => self + .expressions_used + .insert_iter([condition, accept, reject]), + Ex::Derivative { + axis: _, + ctrl: _, + expr, + } => { + self.expressions_used.insert(expr); + } + Ex::Relational { fun: _, argument } => { + self.expressions_used.insert(argument); + } + Ex::Math { + fun: _, + arg, + arg1, + arg2, + arg3, + } => { + self.expressions_used.insert(arg); + self.expressions_used.insert_iter(arg1); + self.expressions_used.insert_iter(arg2); + self.expressions_used.insert_iter(arg3); + } + Ex::As { + expr, + kind: _, + convert: _, + } => { + self.expressions_used.insert(expr); + } + Ex::ArrayLength(expr) => { + self.expressions_used.insert(expr); + } + Ex::AtomicResult { ty, comparison: _ } + | Ex::WorkGroupUniformLoadResult { ty } + | Ex::SubgroupOperationResult { ty } => { + self.types_used.insert(ty); + } + Ex::RayQueryGetIntersection { + query, + committed: _, + } => { + self.expressions_used.insert(query); + } } } } @@ -259,11 +273,9 @@ impl ModuleMap { | Ex::SubgroupBallotResult | Ex::RayQueryProceedResult => {} - // All overrides are retained, so their handles never change. - Ex::Override(_) => {} - // Expressions that contain handles that need to be adjusted. Ex::Constant(ref mut constant) => self.constants.adjust(constant), + Ex::Override(ref mut r#override) => self.overrides.adjust(r#override), Ex::ZeroValue(ref mut ty) => self.types.adjust(ty), Ex::Compose { ref mut ty, diff --git a/naga/src/compact/functions.rs b/naga/src/compact/functions.rs index d523c1889f..b37edd7271 100644 --- a/naga/src/compact/functions.rs +++ b/naga/src/compact/functions.rs @@ -4,9 +4,11 @@ use super::{FunctionMap, ModuleMap}; pub struct FunctionTracer<'a> { pub function: &'a crate::Function, pub constants: &'a crate::Arena, + pub overrides: &'a crate::Arena, pub types_used: &'a mut HandleSet, pub constants_used: &'a mut HandleSet, + pub overrides_used: &'a mut HandleSet, pub global_expressions_used: &'a mut HandleSet, /// Function-local expressions used. @@ -47,10 +49,12 @@ impl FunctionTracer<'_> { fn as_expression(&mut self) -> super::expressions::ExpressionTracer { super::expressions::ExpressionTracer { constants: self.constants, + overrides: self.overrides, expressions: &self.function.expressions, types_used: self.types_used, constants_used: self.constants_used, + overrides_used: self.overrides_used, expressions_used: &mut self.expressions_used, global_expressions_used: Some(&mut self.global_expressions_used), } diff --git a/naga/src/compact/mod.rs b/naga/src/compact/mod.rs index 5b2d2da9a1..03063d454d 100644 --- a/naga/src/compact/mod.rs +++ b/naga/src/compact/mod.rs @@ -45,31 +45,28 @@ pub fn compact(module: &mut crate::Module) { } // We treat all special types as used by definition. + log::trace!("tracing special types"); module_tracer.trace_special_types(&module.special_types); // We treat all named constants as used by definition. + log::trace!("tracing named constants"); for (handle, constant) in module.constants.iter() { if constant.name.is_some() { + log::trace!("tracing constant {:?}", constant.name.as_ref().unwrap()); module_tracer.constants_used.insert(handle); module_tracer.global_expressions_used.insert(constant.init); } } - // We treat all overrides as used by definition. - for (_, override_) in module.overrides.iter() { - module_tracer.types_used.insert(override_.ty); - if let Some(init) = override_.init { - module_tracer.global_expressions_used.insert(init); - } - } - - for (_, ty) in module.types.iter() { - if let crate::TypeInner::Array { - size: crate::ArraySize::Pending(crate::PendingArraySize::Expression(size_expr)), - .. - } = ty.inner - { - module_tracer.global_expressions_used.insert(size_expr); + // We treat all named overrides as used by definition. + log::trace!("tracing named overrides"); + for (handle, r#override) in module.overrides.iter() { + if r#override.name.is_some() { + log::trace!("tracing override {:?}", r#override.name.as_ref().unwrap()); + module_tracer.overrides_used.insert(handle); + if let Some(init) = r#override.init { + module_tracer.global_expressions_used.insert(init); + } } } @@ -111,12 +108,6 @@ pub fn compact(module: &mut crate::Module) { }) .collect(); - // Given that the above steps have marked all the constant - // expressions used directly by globals, constants, functions, and - // entry points, walk the constant expression arena to find all - // constant expressions used, directly or indirectly. - module_tracer.as_const_expression().trace_expressions(); - // Constants' initializers are taken care of already, because // expression tracing sees through constants. But we still need to // note type usage. @@ -126,6 +117,15 @@ pub fn compact(module: &mut crate::Module) { } } + // Overrides' initializers are taken care of already, because + // expression tracing sees through overrides. But we still need to + // note type usage. + for (handle, r#override) in module.overrides.iter() { + if module_tracer.overrides_used.contains(handle) { + module_tracer.types_used.insert(r#override.ty); + } + } + // Treat all named types as used. for (handle, ty) in module.types.iter() { log::trace!("tracing type {:?}, name {:?}", handle, ty.name); @@ -134,8 +134,7 @@ pub fn compact(module: &mut crate::Module) { } } - // Propagate usage through types. - module_tracer.as_type().trace_types(); + module_tracer.type_expression_tandem(); // Now that we know what is used and what is never touched, // produce maps from the `Handle`s that appear in `module` now to @@ -184,14 +183,19 @@ pub fn compact(module: &mut crate::Module) { } }); - // Adjust override types and initializers. + // Drop unused overrides in place, reusing existing storage. log::trace!("adjusting overrides"); - for (_, override_) in module.overrides.iter_mut() { - module_map.types.adjust(&mut override_.ty); - if let Some(init) = override_.init.as_mut() { - module_map.global_expressions.adjust(init); + module.overrides.retain_mut(|handle, r#override| { + if module_map.overrides.used(handle) { + module_map.types.adjust(&mut r#override.ty); + if let Some(ref mut init) = r#override.init { + module_map.global_expressions.adjust(init); + } + true + } else { + false } - } + }); // Adjust workgroup_size_overrides log::trace!("adjusting workgroup_size_overrides"); @@ -240,6 +244,7 @@ struct ModuleTracer<'module> { module: &'module crate::Module, types_used: HandleSet, constants_used: HandleSet, + overrides_used: HandleSet, global_expressions_used: HandleSet, } @@ -249,6 +254,7 @@ impl<'module> ModuleTracer<'module> { module, types_used: HandleSet::for_arena(&module.types), constants_used: HandleSet::for_arena(&module.constants), + overrides_used: HandleSet::for_arena(&module.overrides), global_expressions_used: HandleSet::for_arena(&module.global_expressions), } } @@ -275,20 +281,93 @@ impl<'module> ModuleTracer<'module> { } } + /// Traverse types and global expressions in tandem to determine which are used. + /// + /// Assuming that all types and global expressions used by other parts of + /// the module have been added to [`types_used`] and + /// [`global_expressions_used`], expand those sets to include all types and + /// global expressions reachable from those. + /// + /// [`types_used`]: ModuleTracer::types_used + /// [`global_expressions_used`]: ModuleTracer::global_expressions_used + fn type_expression_tandem(&mut self) { + // For each type T, compute the latest global expression E that T and + // its predecessors refer to. Given the ordering rules on types and + // global expressions in valid modules, we can do this with a single + // forward scan of the type arena. The rules further imply that T can + // only be referred to by expressions after E. + let mut max_dep = Vec::with_capacity(self.module.types.len()); + let mut previous = None; + for (_handle, ty) in self.module.types.iter() { + previous = std::cmp::max( + previous, + match ty.inner { + crate::TypeInner::Array { size, .. } + | crate::TypeInner::BindingArray { size, .. } => match size { + crate::ArraySize::Constant(_) | crate::ArraySize::Dynamic => None, + crate::ArraySize::Pending(pending) => match pending { + crate::PendingArraySize::Expression(handle) => Some(handle), + crate::PendingArraySize::Override(handle) => { + self.module.overrides[handle].init + } + }, + }, + _ => None, + }, + ); + max_dep.push(previous); + } + + // Visit types and global expressions from youngest to oldest. + // + // The outer loop visits types. Before visiting each type, the inner + // loop ensures that all global expressions that could possibly refer to + // it have been visited. And since the inner loop stop at the latest + // expression that the type could possibly refer to, we know that we + // have previously visited any types that might refer to each expression + // we visit. + // + // This lets us assume that any type or expression that is *not* marked + // as used by the time we visit it is genuinely unused, and can be + // ignored. + let mut exprs = self.module.global_expressions.iter().rev().peekable(); + + for ((ty_handle, ty), dep) in self.module.types.iter().zip(max_dep).rev() { + while let Some((expr_handle, expr)) = exprs.next_if(|&(h, _)| Some(h) > dep) { + if self.global_expressions_used.contains(expr_handle) { + self.as_const_expression().trace_expression(expr); + } + } + if self.types_used.contains(ty_handle) { + self.as_type().trace_type(ty); + } + } + // Visit any remaining expressions. + for (expr_handle, expr) in exprs { + if self.global_expressions_used.contains(expr_handle) { + self.as_const_expression().trace_expression(expr); + } + } + } + fn as_type(&mut self) -> types::TypeTracer { types::TypeTracer { - types: &self.module.types, + overrides: &self.module.overrides, types_used: &mut self.types_used, + expressions_used: &mut self.global_expressions_used, + overrides_used: &mut self.overrides_used, } } fn as_const_expression(&mut self) -> expressions::ExpressionTracer { expressions::ExpressionTracer { - expressions: &self.module.global_expressions, constants: &self.module.constants, + overrides: &self.module.overrides, + expressions: &self.module.global_expressions, types_used: &mut self.types_used, constants_used: &mut self.constants_used, expressions_used: &mut self.global_expressions_used, + overrides_used: &mut self.overrides_used, global_expressions_used: None, } } @@ -300,8 +379,10 @@ impl<'module> ModuleTracer<'module> { FunctionTracer { function, constants: &self.module.constants, + overrides: &self.module.overrides, types_used: &mut self.types_used, constants_used: &mut self.constants_used, + overrides_used: &mut self.overrides_used, global_expressions_used: &mut self.global_expressions_used, expressions_used: HandleSet::for_arena(&function.expressions), } @@ -311,6 +392,7 @@ impl<'module> ModuleTracer<'module> { struct ModuleMap { types: HandleMap, constants: HandleMap, + overrides: HandleMap, global_expressions: HandleMap, } @@ -319,6 +401,7 @@ impl From> for ModuleMap { ModuleMap { types: HandleMap::from_set(used.types_used), constants: HandleMap::from_set(used.constants_used), + overrides: HandleMap::from_set(used.overrides_used), global_expressions: HandleMap::from_set(used.global_expressions_used), } } @@ -361,3 +444,448 @@ impl From> for FunctionMap { } } } + +#[test] +fn type_expression_interdependence() { + let mut module: crate::Module = Default::default(); + let u32 = module.types.insert( + crate::Type { + name: None, + inner: crate::TypeInner::Scalar(crate::Scalar { + kind: crate::ScalarKind::Uint, + width: 4, + }), + }, + crate::Span::default(), + ); + let expr = module.global_expressions.append( + crate::Expression::Literal(crate::Literal::U32(0)), + crate::Span::default(), + ); + let type_needs_expression = |module: &mut crate::Module, handle| { + module.types.insert( + crate::Type { + name: None, + inner: crate::TypeInner::Array { + base: u32, + size: crate::ArraySize::Pending(crate::PendingArraySize::Expression(handle)), + stride: 4, + }, + }, + crate::Span::default(), + ) + }; + let expression_needs_type = |module: &mut crate::Module, handle| { + module + .global_expressions + .append(crate::Expression::ZeroValue(handle), crate::Span::default()) + }; + let expression_needs_expression = |module: &mut crate::Module, handle| { + module.global_expressions.append( + crate::Expression::Load { pointer: handle }, + crate::Span::default(), + ) + }; + let type_needs_type = |module: &mut crate::Module, handle| { + module.types.insert( + crate::Type { + name: None, + inner: crate::TypeInner::Array { + base: handle, + size: crate::ArraySize::Dynamic, + stride: 0, + }, + }, + crate::Span::default(), + ) + }; + let mut type_name_counter = 0; + let mut type_needed = |module: &mut crate::Module, handle| { + let name = Some(format!("type{}", type_name_counter)); + type_name_counter += 1; + module.types.insert( + crate::Type { + name, + inner: crate::TypeInner::Array { + base: handle, + size: crate::ArraySize::Dynamic, + stride: 0, + }, + }, + crate::Span::default(), + ) + }; + let mut override_name_counter = 0; + let mut expression_needed = |module: &mut crate::Module, handle| { + let name = Some(format!("override{}", override_name_counter)); + override_name_counter += 1; + module.overrides.append( + crate::Override { + name, + id: None, + ty: u32, + init: Some(handle), + }, + crate::Span::default(), + ) + }; + let cmp_modules = |mod0: &crate::Module, mod1: &crate::Module| { + (mod0.types.iter().collect::>() == mod1.types.iter().collect::>()) + && (mod0.global_expressions.iter().collect::>() + == mod1.global_expressions.iter().collect::>()) + }; + // borrow checker breaks without the tmp variables as of Rust 1.83.0 + let expr_end = type_needs_expression(&mut module, expr); + let ty_trace = type_needs_type(&mut module, expr_end); + let expr_init = expression_needs_type(&mut module, ty_trace); + expression_needed(&mut module, expr_init); + let ty_end = expression_needs_type(&mut module, u32); + let expr_trace = expression_needs_expression(&mut module, ty_end); + let ty_init = type_needs_expression(&mut module, expr_trace); + type_needed(&mut module, ty_init); + let untouched = module.clone(); + compact(&mut module); + assert!(cmp_modules(&module, &untouched)); + let unused_expr = module.global_expressions.append( + crate::Expression::Literal(crate::Literal::U32(1)), + crate::Span::default(), + ); + type_needs_expression(&mut module, unused_expr); + assert!(!cmp_modules(&module, &untouched)); + compact(&mut module); + assert!(cmp_modules(&module, &untouched)); +} + +#[test] +fn array_length_override() { + let mut module: crate::Module = Default::default(); + let ty_bool = module.types.insert( + crate::Type { + name: None, + inner: crate::TypeInner::Scalar(crate::Scalar::BOOL), + }, + crate::Span::default(), + ); + let ty_u32 = module.types.insert( + crate::Type { + name: None, + inner: crate::TypeInner::Scalar(crate::Scalar::U32), + }, + crate::Span::default(), + ); + let one = module.global_expressions.append( + crate::Expression::Literal(crate::Literal::U32(1)), + crate::Span::default(), + ); + let _unused_override = module.overrides.append( + crate::Override { + name: None, + id: Some(40), + ty: ty_u32, + init: None, + }, + crate::Span::default(), + ); + let o = module.overrides.append( + crate::Override { + name: None, + id: Some(42), + ty: ty_u32, + init: Some(one), + }, + crate::Span::default(), + ); + let _ty_array = module.types.insert( + crate::Type { + name: Some("array".to_string()), + inner: crate::TypeInner::Array { + base: ty_bool, + size: crate::ArraySize::Pending(crate::PendingArraySize::Override(o)), + stride: 4, + }, + }, + crate::Span::default(), + ); + + let mut validator = super::valid::Validator::new( + super::valid::ValidationFlags::all(), + super::valid::Capabilities::all(), + ); + + assert!(validator.validate(&module).is_ok()); + compact(&mut module); + assert!(validator.validate(&module).is_ok()); +} + +/// Test mutual references between types and expressions via override +/// lengths. +#[test] +fn array_length_override_mutual() { + use crate::Expression as Ex; + use crate::Scalar as Sc; + use crate::TypeInner as Ti; + + let nowhere = crate::Span::default(); + let mut module = crate::Module::default(); + let ty_u32 = module.types.insert( + crate::Type { + name: None, + inner: Ti::Scalar(Sc::U32), + }, + nowhere, + ); + + // This type is only referred to by the override's init + // expression, so if we visit that too early, this type will be + // removed incorrectly. + let ty_i32 = module.types.insert( + crate::Type { + name: None, + inner: Ti::Scalar(Sc::I32), + }, + nowhere, + ); + + // An override that the other override's init can refer to. + let first_override = module.overrides.append( + crate::Override { + name: None, // so it is not considered used by definition + id: Some(41), + ty: ty_i32, + init: None, + }, + nowhere, + ); + + // Initializer expression for the override: + // + // (first_override + 0) as u32 + // + // The `first_override` makes it an override expression; the `0` + // gets a use of `ty_i32` in there; and the `as` makes it match + // the type of `second_override` without actually making + // `second_override` point at `ty_i32` directly. + let first_override_expr = module + .global_expressions + .append(Ex::Override(first_override), nowhere); + let zero = module + .global_expressions + .append(Ex::ZeroValue(ty_i32), nowhere); + let sum = module.global_expressions.append( + Ex::Binary { + op: crate::BinaryOperator::Add, + left: first_override_expr, + right: zero, + }, + nowhere, + ); + let init = module.global_expressions.append( + Ex::As { + expr: sum, + kind: crate::ScalarKind::Uint, + convert: None, + }, + nowhere, + ); + + // Override that serves as the array's length. + let second_override = module.overrides.append( + crate::Override { + name: None, // so it is not considered used by definition + id: Some(42), + ty: ty_u32, + init: Some(init), + }, + nowhere, + ); + + // Array type that uses the overload as its length. + // Since this is named, it is considered used by definition. + let _ty_array = module.types.insert( + crate::Type { + name: Some("delicious_array".to_string()), + inner: Ti::Array { + base: ty_u32, + size: crate::ArraySize::Pending(crate::PendingArraySize::Override(second_override)), + stride: 4, + }, + }, + nowhere, + ); + + let mut validator = super::valid::Validator::new( + super::valid::ValidationFlags::all(), + super::valid::Capabilities::all(), + ); + + assert!(validator.validate(&module).is_ok()); + compact(&mut module); + assert!(validator.validate(&module).is_ok()); +} + +#[test] +fn array_length_expression() { + let mut module: crate::Module = Default::default(); + let ty_u32 = module.types.insert( + crate::Type { + name: None, + inner: crate::TypeInner::Scalar(crate::Scalar::U32), + }, + crate::Span::default(), + ); + let _unused_zero = module.global_expressions.append( + crate::Expression::Literal(crate::Literal::U32(0)), + crate::Span::default(), + ); + let one = module.global_expressions.append( + crate::Expression::Literal(crate::Literal::U32(1)), + crate::Span::default(), + ); + let _ty_array = module.types.insert( + crate::Type { + name: Some("array".to_string()), + inner: crate::TypeInner::Array { + base: ty_u32, + size: crate::ArraySize::Pending(crate::PendingArraySize::Expression(one)), + stride: 4, + }, + }, + crate::Span::default(), + ); + + let mut validator = super::valid::Validator::new( + super::valid::ValidationFlags::all(), + super::valid::Capabilities::all(), + ); + + assert!(validator.validate(&module).is_ok()); + compact(&mut module); + assert!(validator.validate(&module).is_ok()); +} + +#[test] +fn global_expression_override() { + let mut module: crate::Module = Default::default(); + let ty_u32 = module.types.insert( + crate::Type { + name: None, + inner: crate::TypeInner::Scalar(crate::Scalar::U32), + }, + crate::Span::default(), + ); + + // This will only be retained if we trace the initializers + // of overrides referred to by `Expression::Override` + // in global expressions. + let expr1 = module.global_expressions.append( + crate::Expression::Literal(crate::Literal::U32(1)), + crate::Span::default(), + ); + + // This will only be traced via a global `Expression::Override`. + let o = module.overrides.append( + crate::Override { + name: None, + id: Some(42), + ty: ty_u32, + init: Some(expr1), + }, + crate::Span::default(), + ); + + // This is retained by _p. + let expr2 = module + .global_expressions + .append(crate::Expression::Override(o), crate::Span::default()); + + // Since this is named, it will be retained. + let _p = module.overrides.append( + crate::Override { + name: Some("p".to_string()), + id: None, + ty: ty_u32, + init: Some(expr2), + }, + crate::Span::default(), + ); + + let mut validator = super::valid::Validator::new( + super::valid::ValidationFlags::all(), + super::valid::Capabilities::all(), + ); + + assert!(validator.validate(&module).is_ok()); + compact(&mut module); + assert!(validator.validate(&module).is_ok()); +} + +#[test] +fn local_expression_override() { + let mut module: crate::Module = Default::default(); + let ty_u32 = module.types.insert( + crate::Type { + name: None, + inner: crate::TypeInner::Scalar(crate::Scalar::U32), + }, + crate::Span::default(), + ); + + // This will only be retained if we trace the initializers + // of overrides referred to by `Expression::Override` in a function. + let expr1 = module.global_expressions.append( + crate::Expression::Literal(crate::Literal::U32(1)), + crate::Span::default(), + ); + + // This will be removed by compaction. + let _unused_override = module.overrides.append( + crate::Override { + name: None, + id: Some(41), + ty: ty_u32, + init: None, + }, + crate::Span::default(), + ); + + // This will only be traced via an `Expression::Override` in a function. + let o = module.overrides.append( + crate::Override { + name: None, + id: Some(42), + ty: ty_u32, + init: Some(expr1), + }, + crate::Span::default(), + ); + + let mut fun = crate::Function { + result: Some(crate::FunctionResult { + ty: ty_u32, + binding: None, + }), + ..crate::Function::default() + }; + + // This is used by the `Return` statement. + let o_expr = fun + .expressions + .append(crate::Expression::Override(o), crate::Span::default()); + fun.body.push( + crate::Statement::Return { + value: Some(o_expr), + }, + crate::Span::default(), + ); + + module.functions.append(fun, crate::Span::default()); + + let mut validator = super::valid::Validator::new( + super::valid::ValidationFlags::all(), + super::valid::Capabilities::all(), + ); + + assert!(validator.validate(&module).is_ok()); + compact(&mut module); + assert!(validator.validate(&module).is_ok()); +} diff --git a/naga/src/compact/types.rs b/naga/src/compact/types.rs index 44ee67fdb8..2932568268 100644 --- a/naga/src/compact/types.rs +++ b/naga/src/compact/types.rs @@ -1,59 +1,62 @@ use super::{HandleSet, ModuleMap}; -use crate::{Handle, UniqueArena}; +use crate::Handle; pub struct TypeTracer<'a> { - pub types: &'a UniqueArena, + pub overrides: &'a crate::Arena, pub types_used: &'a mut HandleSet, + pub expressions_used: &'a mut HandleSet, + pub overrides_used: &'a mut HandleSet, } impl TypeTracer<'_> { - /// Propagate usage through `self.types`, starting with `self.types_used`. - /// - /// Treat `self.types_used` as the initial set of "known - /// live" types, and follow through to identify all - /// transitively used types. - pub fn trace_types(&mut self) { - // We don't need recursion or a work list. Because an - // expression may only refer to other expressions that precede - // it in the arena, it suffices to make a single pass over the - // arena from back to front, marking the referents of used - // expressions as used themselves. - for (handle, ty) in self.types.iter().rev() { - // If this type isn't used, it doesn't matter what it uses. - if !self.types_used.contains(handle) { - continue; - } - - use crate::TypeInner as Ti; - match ty.inner { - // Types that do not contain handles. - Ti::Scalar { .. } - | Ti::Vector { .. } - | Ti::Matrix { .. } - | Ti::Atomic { .. } - | Ti::ValuePointer { .. } - | Ti::Image { .. } - | Ti::Sampler { .. } - | Ti::AccelerationStructure { .. } - | Ti::RayQuery { .. } => {} + pub fn trace_type(&mut self, ty: &crate::Type) { + use crate::TypeInner as Ti; + match ty.inner { + // Types that do not contain handles. + Ti::Scalar { .. } + | Ti::Vector { .. } + | Ti::Matrix { .. } + | Ti::Atomic { .. } + | Ti::ValuePointer { .. } + | Ti::Image { .. } + | Ti::Sampler { .. } + | Ti::AccelerationStructure { .. } + | Ti::RayQuery { .. } => {} - // Types that do contain handles. - Ti::Pointer { base, space: _ } - | Ti::Array { - base, - size: _, - stride: _, - } - | Ti::BindingArray { base, size: _ } => { - self.types_used.insert(base); - } - Ti::Struct { - ref members, - span: _, - } => { - self.types_used.insert_iter(members.iter().map(|m| m.ty)); + // Types that do contain handles. + Ti::Array { + base, + size, + stride: _, + } + | Ti::BindingArray { base, size } => { + self.types_used.insert(base); + match size { + crate::ArraySize::Pending(pending) => match pending { + crate::PendingArraySize::Expression(expr) => { + self.expressions_used.insert(expr); + } + crate::PendingArraySize::Override(handle) => { + self.overrides_used.insert(handle); + let r#override = &self.overrides[handle]; + self.types_used.insert(r#override.ty); + if let Some(expr) = r#override.init { + self.expressions_used.insert(expr); + } + } + }, + crate::ArraySize::Constant(_) | crate::ArraySize::Dynamic => {} } } + Ti::Pointer { base, space: _ } => { + self.types_used.insert(base); + } + Ti::Struct { + ref members, + span: _, + } => { + self.types_used.insert_iter(members.iter().map(|m| m.ty)); + } } } } @@ -84,13 +87,24 @@ impl ModuleMap { ref mut base, ref mut size, stride: _, + } + | Ti::BindingArray { + ref mut base, + ref mut size, } => { adjust(base); - if let crate::ArraySize::Pending(crate::PendingArraySize::Expression( - ref mut size_expr, - )) = *size - { - self.global_expressions.adjust(size_expr); + match *size { + crate::ArraySize::Pending(crate::PendingArraySize::Expression( + ref mut size_expr, + )) => { + self.global_expressions.adjust(size_expr); + } + crate::ArraySize::Pending(crate::PendingArraySize::Override( + ref mut r#override, + )) => { + self.overrides.adjust(r#override); + } + crate::ArraySize::Constant(_) | crate::ArraySize::Dynamic => {} } } Ti::Struct { @@ -101,12 +115,6 @@ impl ModuleMap { self.types.adjust(&mut member.ty); } } - Ti::BindingArray { - ref mut base, - size: _, - } => { - adjust(base); - } }; } } diff --git a/naga/src/front/spv/image.rs b/naga/src/front/spv/image.rs index 22c1701ed8..ace97d4dd5 100644 --- a/naga/src/front/spv/image.rs +++ b/naga/src/front/spv/image.rs @@ -311,12 +311,51 @@ impl> super::Frontend { let value_lexp = self.lookup_expression.lookup(value_id)?; let value = self.get_expr_handle(value_id, value_lexp, ctx, emitter, block, body_idx); + let value_type = self.lookup_type.lookup(value_lexp.type_id)?.handle; + + // In hlsl etc, the write value may not be the vector 4. + let expanded_value = match ctx.module.types[value_type].inner { + crate::TypeInner::Scalar(_) => Some(crate::Expression::Splat { + value, + size: crate::VectorSize::Quad, + }), + crate::TypeInner::Vector { size, .. } => match size { + crate::VectorSize::Bi => Some(crate::Expression::Swizzle { + size: crate::VectorSize::Quad, + vector: value, + pattern: [ + crate::SwizzleComponent::X, + crate::SwizzleComponent::Y, + crate::SwizzleComponent::Y, + crate::SwizzleComponent::Y, + ], + }), + crate::VectorSize::Tri => Some(crate::Expression::Swizzle { + size: crate::VectorSize::Quad, + vector: value, + pattern: [ + crate::SwizzleComponent::X, + crate::SwizzleComponent::Y, + crate::SwizzleComponent::Z, + crate::SwizzleComponent::Z, + ], + }), + crate::VectorSize::Quad => None, + }, + _ => return Err(Error::InvalidVectorType(value_type)), + }; + + let value_patched = if let Some(s) = expanded_value { + ctx.expressions.append(s, crate::Span::default()) + } else { + value + }; Ok(crate::Statement::ImageStore { image: image_lexp.handle, coordinate, array_index, - value, + value: value_patched, }) } diff --git a/naga/src/front/type_gen.rs b/naga/src/front/type_gen.rs index c35d3c455b..737ef26697 100644 --- a/naga/src/front/type_gen.rs +++ b/naga/src/front/type_gen.rs @@ -210,13 +210,13 @@ impl crate::Module { offset: 4, }, crate::StructMember { - name: Some("instance_custom_index".to_string()), + name: Some("instance_custom_data".to_string()), ty: ty_flag, binding: None, offset: 8, }, crate::StructMember { - name: Some("instance_id".to_string()), + name: Some("instance_index".to_string()), ty: ty_flag, binding: None, offset: 12, diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs index 44ac7885cc..7bdbf12d2c 100644 --- a/naga/src/front/wgsl/error.rs +++ b/naga/src/front/wgsl/error.rs @@ -213,7 +213,6 @@ pub(crate) enum Error<'a> { InvalidAtomicPointer(Span), InvalidAtomicOperandType(Span), InvalidRayQueryPointer(Span), - Pointer(&'static str, Span), NotPointer(Span), NotReference(&'static str, Span), InvalidAssignment { @@ -262,6 +261,8 @@ pub(crate) enum Error<'a> { found: u32, }, FunctionReturnsVoid(Span), + FunctionMustUseUnused(Span), + FunctionMustUseReturnsVoid(Span, Span), InvalidWorkGroupUniformLoad(Span), Internal(&'static str), ExpectedConstExprConcreteIntegerScalar(Span), @@ -717,11 +718,6 @@ impl<'a> Error<'a> { notes, } } - Error::Pointer(what, span) => ParseError { - message: format!("{what} must not be a pointer"), - labels: vec![(span, "expression is a pointer".into())], - notes: vec![], - }, Error::ReservedKeyword(name_span) => ParseError { message: format!("name `{}` is a reserved keyword", &source[name_span]), labels: vec![( @@ -820,6 +816,27 @@ impl<'a> Error<'a> { "perhaps you meant to call the function in a separate statement?".into(), ], }, + Error::FunctionMustUseUnused(call) => ParseError { + message: "unused return value from function annotated with @must_use".into(), + labels: vec![(call, "".into())], + notes: vec![ + format!( + "function '{}' is declared with `@must_use` attribute", + &source[call], + ), + "use a phony assignment or declare a value using the function call as the initializer".into(), + ], + }, + Error::FunctionMustUseReturnsVoid(attr, signature) => ParseError { + message: "function annotated with @must_use but does not return any value".into(), + labels: vec![ + (attr, "".into()), + (signature, "".into()), + ], + notes: vec![ + "declare a return type or remove the attribute".into(), + ], + }, Error::InvalidWorkGroupUniformLoad(span) => ParseError { message: "incorrect type passed to workgroupUniformLoad".into(), labels: vec![(span, "".into())], diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index fa59f0b41d..65e4032216 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -372,6 +372,7 @@ pub struct ExpressionContext<'source, 'temp, 'out> { } impl<'source, 'temp, 'out> ExpressionContext<'source, 'temp, 'out> { + #[allow(dead_code)] fn as_const(&mut self) -> ExpressionContext<'source, '_, '_> { ExpressionContext { globals: self.globals, @@ -379,7 +380,21 @@ impl<'source, 'temp, 'out> ExpressionContext<'source, 'temp, 'out> { ast_expressions: self.ast_expressions, const_typifier: self.const_typifier, module: self.module, - expr_type: ExpressionContextType::Constant(None), + expr_type: ExpressionContextType::Constant(match self.expr_type { + ExpressionContextType::Runtime(ref mut local_expression_context) + | ExpressionContextType::Constant(Some(ref mut local_expression_context)) => { + Some(LocalExpressionContext { + local_table: local_expression_context.local_table, + function: local_expression_context.function, + block: local_expression_context.block, + emitter: local_expression_context.emitter, + typifier: local_expression_context.typifier, + local_expression_kind_tracker: local_expression_context + .local_expression_kind_tracker, + }) + } + ExpressionContextType::Constant(None) | ExpressionContextType::Override => None, + }), global_expression_kind_tracker: self.global_expression_kind_tracker, } } @@ -919,7 +934,10 @@ impl Components { /// An `ast::GlobalDecl` for which we have built the Naga IR equivalent. enum LoweredGlobalDecl { - Function(Handle), + Function { + handle: Handle, + must_use: bool, + }, Var(Handle), Const(Handle), Override(Handle), @@ -1350,7 +1368,10 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { Ok(LoweredGlobalDecl::EntryPoint) } else { let handle = ctx.module.functions.append(function, span); - Ok(LoweredGlobalDecl::Function(handle)) + Ok(LoweredGlobalDecl::Function { + handle, + must_use: f.result.as_ref().is_some_and(|res| res.must_use), + }) } } @@ -1651,13 +1672,28 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { } ast::StatementKind::Break => crate::Statement::Break, ast::StatementKind::Continue => crate::Statement::Continue, - ast::StatementKind::Return { value } => { + ast::StatementKind::Return { value: ast_value } => { let mut emitter = Emitter::default(); emitter.start(&ctx.function.expressions); - let value = value - .map(|expr| self.expression(expr, &mut ctx.as_expression(block, &mut emitter))) - .transpose()?; + let value; + if let Some(ast_expr) = ast_value { + let result_ty = ctx.function.result.as_ref().map(|r| r.ty); + let mut ectx = ctx.as_expression(block, &mut emitter); + let expr = self.expression_for_abstract(ast_expr, &mut ectx)?; + + if let Some(result_ty) = result_ty { + let mut ectx = ctx.as_expression(block, &mut emitter); + let resolution = crate::proc::TypeResolution::Handle(result_ty); + let converted = + ectx.try_automatic_conversions(expr, &resolution, Span::default())?; + value = Some(converted); + } else { + value = Some(expr); + } + } else { + value = None; + } block.extend(emitter.finish(&ctx.function.expressions)); crate::Statement::Return { value } @@ -1919,7 +1955,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { LoweredGlobalDecl::Override(handle) => { Typed::Plain(crate::Expression::Override(handle)) } - LoweredGlobalDecl::Function(_) + LoweredGlobalDecl::Function { .. } | LoweredGlobalDecl::Type(_) | LoweredGlobalDecl::EntryPoint => { return Err(Error::Unexpected(span, ExpectedToken::Variable)); @@ -1977,15 +2013,14 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { return Ok(Typed::Plain(handle)); } ast::Expression::Index { base, index } => { - let lowered_base = self.expression_for_reference(base, ctx)?; + let mut lowered_base = self.expression_for_reference(base, ctx)?; let index = self.expression(index, ctx)?; + // + // Declare pointer as reference if let Typed::Plain(handle) = lowered_base { if resolve_inner!(ctx, handle).pointer_space().is_some() { - return Err(Error::Pointer( - "the value indexed by a `[]` subscripting expression", - ctx.ast_expressions.get_span(base), - )); + lowered_base = Typed::Reference(handle); } } @@ -1995,7 +2030,15 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { }) } ast::Expression::Member { base, ref field } => { - let lowered_base = self.expression_for_reference(base, ctx)?; + let mut lowered_base = self.expression_for_reference(base, ctx)?; + + // + // Declare pointer as reference + if let Typed::Plain(handle) = lowered_base { + if resolve_inner!(ctx, handle).pointer_space().is_some() { + lowered_base = Typed::Reference(handle); + } + } let temp_inner; let composite_type: &crate::TypeInner = match lowered_base { @@ -2024,16 +2067,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { } Typed::Plain(handle) => { - let inner = resolve_inner!(ctx, handle); - if let crate::TypeInner::Pointer { .. } - | crate::TypeInner::ValuePointer { .. } = *inner - { - return Err(Error::Pointer( - "the value accessed by a `.member` expression", - ctx.ast_expressions.get_span(base), - )); - } - inner + resolve_inner!(ctx, handle) } }; @@ -2166,12 +2200,13 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { ctx: &mut ExpressionContext<'source, '_, '_>, is_statement: bool, ) -> Result>, Error<'source>> { + let function_span = function.span; match ctx.globals.get(function.name) { Some(&LoweredGlobalDecl::Type(ty)) => { let handle = self.construct( span, &ast::ConstructorType::Type(ty), - function.span, + function_span, arguments, ctx, )?; @@ -2181,9 +2216,12 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { &LoweredGlobalDecl::Const(_) | &LoweredGlobalDecl::Override(_) | &LoweredGlobalDecl::Var(_), - ) => Err(Error::Unexpected(function.span, ExpectedToken::Function)), - Some(&LoweredGlobalDecl::EntryPoint) => Err(Error::CalledEntryPoint(function.span)), - Some(&LoweredGlobalDecl::Function(function)) => { + ) => Err(Error::Unexpected(function_span, ExpectedToken::Function)), + Some(&LoweredGlobalDecl::EntryPoint) => Err(Error::CalledEntryPoint(function_span)), + Some(&LoweredGlobalDecl::Function { + handle: function, + must_use, + }) => { let arguments = arguments .iter() .enumerate() @@ -2208,6 +2246,11 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { .collect::, _>>()?; let has_result = ctx.module.functions[function].result.is_some(); + + if must_use && is_statement { + return Err(Error::FunctionMustUseUnused(function_span)); + } + let rctx = ctx.runtime_expression_ctx(span)?; // we need to always do this before a fn call since all arguments need to be emitted before the fn call rctx.block @@ -2234,7 +2277,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { Ok(result) } None => { - let span = function.span; + let span = function_span; let expr = if let Some(fun) = conv::map_relational_fun(function.name) { let mut args = ctx.prepare_args(arguments, 1, span); let argument = self.expression(args.next()?, ctx)?; @@ -2915,7 +2958,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { let offset = args .next() - .map(|arg| self.expression(arg, &mut ctx.as_const())) + .map(|arg| self.expression(arg, &mut ctx.as_global().as_const())) .ok() .transpose()?; diff --git a/naga/src/front/wgsl/parse/ast.rs b/naga/src/front/wgsl/parse/ast.rs index a95c89f3f2..0912d9d822 100644 --- a/naga/src/front/wgsl/parse/ast.rs +++ b/naga/src/front/wgsl/parse/ast.rs @@ -114,6 +114,7 @@ pub struct FunctionArgument<'a> { pub struct FunctionResult<'a> { pub ty: Handle>, pub binding: Option>, + pub must_use: bool, } #[derive(Debug)] diff --git a/naga/src/front/wgsl/parse/directive/language_extension.rs b/naga/src/front/wgsl/parse/directive/language_extension.rs index 92980b5563..4a48ccd919 100644 --- a/naga/src/front/wgsl/parse/directive/language_extension.rs +++ b/naga/src/front/wgsl/parse/directive/language_extension.rs @@ -34,7 +34,7 @@ impl LanguageExtension { Self::Unimplemented(UnimplementedLanguageExtension::UnrestrictedPointerParameters) } Self::POINTER_COMPOSITE_ACCESS => { - Self::Unimplemented(UnimplementedLanguageExtension::PointerCompositeAccess) + Self::Implemented(ImplementedLanguageExtension::PointerCompositeAccess) } _ => return None, }) @@ -54,9 +54,6 @@ impl LanguageExtension { UnimplementedLanguageExtension::UnrestrictedPointerParameters => { Self::UNRESTRICTED_POINTER_PARAMETERS } - UnimplementedLanguageExtension::PointerCompositeAccess => { - Self::POINTER_COMPOSITE_ACCESS - } }, } } @@ -64,7 +61,9 @@ impl LanguageExtension { /// A variant of [`LanguageExtension::Implemented`]. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, VariantArray)] -pub enum ImplementedLanguageExtension {} +pub enum ImplementedLanguageExtension { + PointerCompositeAccess, +} impl ImplementedLanguageExtension { /// Returns slice of all variants of [`ImplementedLanguageExtension`]. @@ -74,7 +73,11 @@ impl ImplementedLanguageExtension { /// Maps this [`ImplementedLanguageExtension`] into the sentinel word associated with it in WGSL. pub const fn to_ident(self) -> &'static str { - match self {} + match self { + ImplementedLanguageExtension::PointerCompositeAccess => { + LanguageExtension::POINTER_COMPOSITE_ACCESS + } + } } } @@ -84,7 +87,6 @@ pub enum UnimplementedLanguageExtension { ReadOnlyAndReadWriteStorageTextures, Packed4x8IntegerDotProduct, UnrestrictedPointerParameters, - PointerCompositeAccess, } impl UnimplementedLanguageExtension { @@ -93,7 +95,6 @@ impl UnimplementedLanguageExtension { Self::ReadOnlyAndReadWriteStorageTextures => 6204, Self::Packed4x8IntegerDotProduct => 6445, Self::UnrestrictedPointerParameters => 5158, - Self::PointerCompositeAccess => 6192, } } } diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs index e4c8a72cdd..3e8ff789ed 100644 --- a/naga/src/front/wgsl/parse/mod.rs +++ b/naga/src/front/wgsl/parse/mod.rs @@ -261,15 +261,20 @@ impl<'a> BindingParser<'a> { pub struct Parser { rules: Vec<(Rule, usize)>, + recursion_depth: u32, } impl Parser { pub const fn new() -> Self { - Parser { rules: Vec::new() } + Parser { + rules: Vec::new(), + recursion_depth: 0, + } } fn reset(&mut self) { self.rules.clear(); + self.recursion_depth = 0; } fn push_rule_span(&mut self, rule: Rule, lexer: &mut Lexer<'_>) { @@ -296,6 +301,19 @@ impl Parser { ) } + fn track_recursion<'a, F, R>(&mut self, f: F) -> Result> + where + F: FnOnce(&mut Self) -> Result>, + { + self.recursion_depth += 1; + if self.recursion_depth >= 256 { + return Err(Error::Internal("Parser recursion limit exceeded")); + } + let ret = f(self); + self.recursion_depth -= 1; + ret + } + fn switch_value<'a>( &mut self, lexer: &mut Lexer<'a>, @@ -860,58 +878,60 @@ impl Parser { lexer: &mut Lexer<'a>, ctx: &mut ExpressionContext<'a, '_, '_>, ) -> Result>, Error<'a>> { - self.push_rule_span(Rule::UnaryExpr, lexer); - //TODO: refactor this to avoid backing up - let expr = match lexer.peek().0 { - Token::Operation('-') => { - let _ = lexer.next(); - let expr = self.unary_expression(lexer, ctx)?; - let expr = ast::Expression::Unary { - op: crate::UnaryOperator::Negate, - expr, - }; - let span = self.peek_rule_span(lexer); - ctx.expressions.append(expr, span) - } - Token::Operation('!') => { - let _ = lexer.next(); - let expr = self.unary_expression(lexer, ctx)?; - let expr = ast::Expression::Unary { - op: crate::UnaryOperator::LogicalNot, - expr, - }; - let span = self.peek_rule_span(lexer); - ctx.expressions.append(expr, span) - } - Token::Operation('~') => { - let _ = lexer.next(); - let expr = self.unary_expression(lexer, ctx)?; - let expr = ast::Expression::Unary { - op: crate::UnaryOperator::BitwiseNot, - expr, - }; - let span = self.peek_rule_span(lexer); - ctx.expressions.append(expr, span) - } - Token::Operation('*') => { - let _ = lexer.next(); - let expr = self.unary_expression(lexer, ctx)?; - let expr = ast::Expression::Deref(expr); - let span = self.peek_rule_span(lexer); - ctx.expressions.append(expr, span) - } - Token::Operation('&') => { - let _ = lexer.next(); - let expr = self.unary_expression(lexer, ctx)?; - let expr = ast::Expression::AddrOf(expr); - let span = self.peek_rule_span(lexer); - ctx.expressions.append(expr, span) - } - _ => self.singular_expression(lexer, ctx)?, - }; + self.track_recursion(|this| { + this.push_rule_span(Rule::UnaryExpr, lexer); + //TODO: refactor this to avoid backing up + let expr = match lexer.peek().0 { + Token::Operation('-') => { + let _ = lexer.next(); + let expr = this.unary_expression(lexer, ctx)?; + let expr = ast::Expression::Unary { + op: crate::UnaryOperator::Negate, + expr, + }; + let span = this.peek_rule_span(lexer); + ctx.expressions.append(expr, span) + } + Token::Operation('!') => { + let _ = lexer.next(); + let expr = this.unary_expression(lexer, ctx)?; + let expr = ast::Expression::Unary { + op: crate::UnaryOperator::LogicalNot, + expr, + }; + let span = this.peek_rule_span(lexer); + ctx.expressions.append(expr, span) + } + Token::Operation('~') => { + let _ = lexer.next(); + let expr = this.unary_expression(lexer, ctx)?; + let expr = ast::Expression::Unary { + op: crate::UnaryOperator::BitwiseNot, + expr, + }; + let span = this.peek_rule_span(lexer); + ctx.expressions.append(expr, span) + } + Token::Operation('*') => { + let _ = lexer.next(); + let expr = this.unary_expression(lexer, ctx)?; + let expr = ast::Expression::Deref(expr); + let span = this.peek_rule_span(lexer); + ctx.expressions.append(expr, span) + } + Token::Operation('&') => { + let _ = lexer.next(); + let expr = this.unary_expression(lexer, ctx)?; + let expr = ast::Expression::AddrOf(expr); + let span = this.peek_rule_span(lexer); + ctx.expressions.append(expr, span) + } + _ => this.singular_expression(lexer, ctx)?, + }; - self.pop_rule_span(lexer); - Ok(expr) + this.pop_rule_span(lexer); + Ok(expr) + }) } /// Parse a `singular_expression`. @@ -1650,25 +1670,27 @@ impl Parser { lexer: &mut Lexer<'a>, ctx: &mut ExpressionContext<'a, '_, '_>, ) -> Result>, Error<'a>> { - self.push_rule_span(Rule::TypeDecl, lexer); - - let (name, span) = lexer.next_ident_with_span()?; - - let ty = match self.type_decl_impl(lexer, name, ctx)? { - Some(ty) => ty, - None => { - ctx.unresolved.insert(ast::Dependency { - ident: name, - usage: span, - }); - ast::Type::User(ast::Ident { name, span }) - } - }; + self.track_recursion(|this| { + this.push_rule_span(Rule::TypeDecl, lexer); + + let (name, span) = lexer.next_ident_with_span()?; + + let ty = match this.type_decl_impl(lexer, name, ctx)? { + Some(ty) => ty, + None => { + ctx.unresolved.insert(ast::Dependency { + ident: name, + usage: span, + }); + ast::Type::User(ast::Ident { name, span }) + } + }; - self.pop_rule_span(lexer); + this.pop_rule_span(lexer); - let handle = ctx.types.append(ty, Span::UNDEFINED); - Ok(handle) + let handle = ctx.types.append(ty, Span::UNDEFINED); + Ok(handle) + }) } fn assignment_op_and_rhs<'a>( @@ -1812,291 +1834,235 @@ impl Parser { block: &mut ast::Block<'a>, brace_nesting_level: u8, ) -> Result<(), Error<'a>> { - self.push_rule_span(Rule::Statement, lexer); - match lexer.peek() { - (Token::Separator(';'), _) => { - let _ = lexer.next(); - self.pop_rule_span(lexer); - } - (Token::Paren('{') | Token::Attribute, _) => { - let (inner, span) = self.block(lexer, ctx, brace_nesting_level)?; - block.stmts.push(ast::Statement { - kind: ast::StatementKind::Block(inner), - span, - }); - self.pop_rule_span(lexer); - } - (Token::Word(word), _) => { - let kind = match word { - "_" => { - let _ = lexer.next(); - lexer.expect(Token::Operation('='))?; - let expr = self.general_expression(lexer, ctx)?; - lexer.expect(Token::Separator(';'))?; + self.track_recursion(|this| { + this.push_rule_span(Rule::Statement, lexer); + match lexer.peek() { + (Token::Separator(';'), _) => { + let _ = lexer.next(); + this.pop_rule_span(lexer); + } + (Token::Paren('{') | Token::Attribute, _) => { + let (inner, span) = this.block(lexer, ctx, brace_nesting_level)?; + block.stmts.push(ast::Statement { + kind: ast::StatementKind::Block(inner), + span, + }); + this.pop_rule_span(lexer); + } + (Token::Word(word), _) => { + let kind = match word { + "_" => { + let _ = lexer.next(); + lexer.expect(Token::Operation('='))?; + let expr = this.general_expression(lexer, ctx)?; + lexer.expect(Token::Separator(';'))?; + + ast::StatementKind::Phony(expr) + } + "let" => { + let _ = lexer.next(); + let name = lexer.next_ident()?; + + let given_ty = if lexer.skip(Token::Separator(':')) { + let ty = this.type_decl(lexer, ctx)?; + Some(ty) + } else { + None + }; + lexer.expect(Token::Operation('='))?; + let expr_id = this.general_expression(lexer, ctx)?; + lexer.expect(Token::Separator(';'))?; + + let handle = ctx.declare_local(name)?; + ast::StatementKind::LocalDecl(ast::LocalDecl::Let(ast::Let { + name, + ty: given_ty, + init: expr_id, + handle, + })) + } + "const" => { + let _ = lexer.next(); + let name = lexer.next_ident()?; + + let given_ty = if lexer.skip(Token::Separator(':')) { + let ty = this.type_decl(lexer, ctx)?; + Some(ty) + } else { + None + }; + lexer.expect(Token::Operation('='))?; + let expr_id = this.general_expression(lexer, ctx)?; + lexer.expect(Token::Separator(';'))?; + + let handle = ctx.declare_local(name)?; + ast::StatementKind::LocalDecl(ast::LocalDecl::Const(ast::LocalConst { + name, + ty: given_ty, + init: expr_id, + handle, + })) + } + "var" => { + let _ = lexer.next(); + + let name = lexer.next_ident()?; + let ty = if lexer.skip(Token::Separator(':')) { + let ty = this.type_decl(lexer, ctx)?; + Some(ty) + } else { + None + }; - ast::StatementKind::Phony(expr) - } - "let" => { - let _ = lexer.next(); - let name = lexer.next_ident()?; - - let given_ty = if lexer.skip(Token::Separator(':')) { - let ty = self.type_decl(lexer, ctx)?; - Some(ty) - } else { - None - }; - lexer.expect(Token::Operation('='))?; - let expr_id = self.general_expression(lexer, ctx)?; - lexer.expect(Token::Separator(';'))?; + let init = if lexer.skip(Token::Operation('=')) { + let init = this.general_expression(lexer, ctx)?; + Some(init) + } else { + None + }; - let handle = ctx.declare_local(name)?; - ast::StatementKind::LocalDecl(ast::LocalDecl::Let(ast::Let { - name, - ty: given_ty, - init: expr_id, - handle, - })) - } - "const" => { - let _ = lexer.next(); - let name = lexer.next_ident()?; - - let given_ty = if lexer.skip(Token::Separator(':')) { - let ty = self.type_decl(lexer, ctx)?; - Some(ty) - } else { - None - }; - lexer.expect(Token::Operation('='))?; - let expr_id = self.general_expression(lexer, ctx)?; - lexer.expect(Token::Separator(';'))?; + lexer.expect(Token::Separator(';'))?; - let handle = ctx.declare_local(name)?; - ast::StatementKind::LocalDecl(ast::LocalDecl::Const(ast::LocalConst { - name, - ty: given_ty, - init: expr_id, - handle, - })) - } - "var" => { - let _ = lexer.next(); - - let name = lexer.next_ident()?; - let ty = if lexer.skip(Token::Separator(':')) { - let ty = self.type_decl(lexer, ctx)?; - Some(ty) - } else { - None - }; - - let init = if lexer.skip(Token::Operation('=')) { - let init = self.general_expression(lexer, ctx)?; - Some(init) - } else { - None - }; + let handle = ctx.declare_local(name)?; + ast::StatementKind::LocalDecl(ast::LocalDecl::Var(ast::LocalVariable { + name, + ty, + init, + handle, + })) + } + "return" => { + let _ = lexer.next(); + let value = if lexer.peek().0 != Token::Separator(';') { + let handle = this.general_expression(lexer, ctx)?; + Some(handle) + } else { + None + }; + lexer.expect(Token::Separator(';'))?; + ast::StatementKind::Return { value } + } + "if" => { + let _ = lexer.next(); + let condition = this.general_expression(lexer, ctx)?; - lexer.expect(Token::Separator(';'))?; + let accept = this.block(lexer, ctx, brace_nesting_level)?.0; - let handle = ctx.declare_local(name)?; - ast::StatementKind::LocalDecl(ast::LocalDecl::Var(ast::LocalVariable { - name, - ty, - init, - handle, - })) - } - "return" => { - let _ = lexer.next(); - let value = if lexer.peek().0 != Token::Separator(';') { - let handle = self.general_expression(lexer, ctx)?; - Some(handle) - } else { - None - }; - lexer.expect(Token::Separator(';'))?; - ast::StatementKind::Return { value } - } - "if" => { - let _ = lexer.next(); - let condition = self.general_expression(lexer, ctx)?; + let mut elsif_stack = Vec::new(); + let mut elseif_span_start = lexer.start_byte_offset(); + let mut reject = loop { + if !lexer.skip(Token::Word("else")) { + break ast::Block::default(); + } - let accept = self.block(lexer, ctx, brace_nesting_level)?.0; + if !lexer.skip(Token::Word("if")) { + // ... else { ... } + break this.block(lexer, ctx, brace_nesting_level)?.0; + } - let mut elsif_stack = Vec::new(); - let mut elseif_span_start = lexer.start_byte_offset(); - let mut reject = loop { - if !lexer.skip(Token::Word("else")) { - break ast::Block::default(); - } + // ... else if (...) { ... } + let other_condition = this.general_expression(lexer, ctx)?; + let other_block = this.block(lexer, ctx, brace_nesting_level)?; + elsif_stack.push((elseif_span_start, other_condition, other_block)); + elseif_span_start = lexer.start_byte_offset(); + }; - if !lexer.skip(Token::Word("if")) { - // ... else { ... } - break self.block(lexer, ctx, brace_nesting_level)?.0; + // reverse-fold the else-if blocks + //Note: we may consider uplifting this to the IR + for (other_span_start, other_cond, other_block) in + elsif_stack.into_iter().rev() + { + let sub_stmt = ast::StatementKind::If { + condition: other_cond, + accept: other_block.0, + reject, + }; + reject = ast::Block::default(); + let span = lexer.span_from(other_span_start); + reject.stmts.push(ast::Statement { + kind: sub_stmt, + span, + }) } - // ... else if (...) { ... } - let other_condition = self.general_expression(lexer, ctx)?; - let other_block = self.block(lexer, ctx, brace_nesting_level)?; - elsif_stack.push((elseif_span_start, other_condition, other_block)); - elseif_span_start = lexer.start_byte_offset(); - }; - - // reverse-fold the else-if blocks - //Note: we may consider uplifting this to the IR - for (other_span_start, other_cond, other_block) in - elsif_stack.into_iter().rev() - { - let sub_stmt = ast::StatementKind::If { - condition: other_cond, - accept: other_block.0, + ast::StatementKind::If { + condition, + accept, reject, - }; - reject = ast::Block::default(); - let span = lexer.span_from(other_span_start); - reject.stmts.push(ast::Statement { - kind: sub_stmt, - span, - }) - } - - ast::StatementKind::If { - condition, - accept, - reject, + } } - } - "switch" => { - let _ = lexer.next(); - let selector = self.general_expression(lexer, ctx)?; - let brace_span = lexer.expect_span(Token::Paren('{'))?; - let brace_nesting_level = - Self::increase_brace_nesting(brace_nesting_level, brace_span)?; - let mut cases = Vec::new(); - - loop { - // cases + default - match lexer.next() { - (Token::Word("case"), _) => { - // parse a list of values - let value = loop { - let value = self.switch_value(lexer, ctx)?; - if lexer.skip(Token::Separator(',')) { - if lexer.skip(Token::Separator(':')) { + "switch" => { + let _ = lexer.next(); + let selector = this.general_expression(lexer, ctx)?; + let brace_span = lexer.expect_span(Token::Paren('{'))?; + let brace_nesting_level = + Self::increase_brace_nesting(brace_nesting_level, brace_span)?; + let mut cases = Vec::new(); + + loop { + // cases + default + match lexer.next() { + (Token::Word("case"), _) => { + // parse a list of values + let value = loop { + let value = this.switch_value(lexer, ctx)?; + if lexer.skip(Token::Separator(',')) { + if lexer.skip(Token::Separator(':')) { + break value; + } + } else { + lexer.skip(Token::Separator(':')); break value; } - } else { - lexer.skip(Token::Separator(':')); - break value; - } + cases.push(ast::SwitchCase { + value, + body: ast::Block::default(), + fall_through: true, + }); + }; + + let body = this.block(lexer, ctx, brace_nesting_level)?.0; + cases.push(ast::SwitchCase { value, - body: ast::Block::default(), - fall_through: true, + body, + fall_through: false, }); - }; - - let body = self.block(lexer, ctx, brace_nesting_level)?.0; - - cases.push(ast::SwitchCase { - value, - body, - fall_through: false, - }); - } - (Token::Word("default"), _) => { - lexer.skip(Token::Separator(':')); - let body = self.block(lexer, ctx, brace_nesting_level)?.0; - cases.push(ast::SwitchCase { - value: ast::SwitchValue::Default, - body, - fall_through: false, - }); - } - (Token::Paren('}'), _) => break, - (_, span) => { - return Err(Error::Unexpected(span, ExpectedToken::SwitchItem)) + } + (Token::Word("default"), _) => { + lexer.skip(Token::Separator(':')); + let body = this.block(lexer, ctx, brace_nesting_level)?.0; + cases.push(ast::SwitchCase { + value: ast::SwitchValue::Default, + body, + fall_through: false, + }); + } + (Token::Paren('}'), _) => break, + (_, span) => { + return Err(Error::Unexpected( + span, + ExpectedToken::SwitchItem, + )) + } } } - } - - ast::StatementKind::Switch { selector, cases } - } - "loop" => self.r#loop(lexer, ctx, brace_nesting_level)?, - "while" => { - let _ = lexer.next(); - let mut body = ast::Block::default(); - - let (condition, span) = - lexer.capture_span(|lexer| self.general_expression(lexer, ctx))?; - let mut reject = ast::Block::default(); - reject.stmts.push(ast::Statement { - kind: ast::StatementKind::Break, - span, - }); - body.stmts.push(ast::Statement { - kind: ast::StatementKind::If { - condition, - accept: ast::Block::default(), - reject, - }, - span, - }); - - let (block, span) = self.block(lexer, ctx, brace_nesting_level)?; - body.stmts.push(ast::Statement { - kind: ast::StatementKind::Block(block), - span, - }); - - ast::StatementKind::Loop { - body, - continuing: ast::Block::default(), - break_if: None, + ast::StatementKind::Switch { selector, cases } } - } - "for" => { - let _ = lexer.next(); - lexer.expect(Token::Paren('('))?; - - ctx.local_table.push_scope(); - - if !lexer.skip(Token::Separator(';')) { - let num_statements = block.stmts.len(); - let (_, span) = { - let ctx = &mut *ctx; - let block = &mut *block; - lexer.capture_span(|lexer| { - self.statement(lexer, ctx, block, brace_nesting_level) - })? - }; - - if block.stmts.len() != num_statements { - match block.stmts.last().unwrap().kind { - ast::StatementKind::Call { .. } - | ast::StatementKind::Assign { .. } - | ast::StatementKind::LocalDecl(_) => {} - _ => return Err(Error::InvalidForInitializer(span)), - } - } - }; + "loop" => this.r#loop(lexer, ctx, brace_nesting_level)?, + "while" => { + let _ = lexer.next(); + let mut body = ast::Block::default(); - let mut body = ast::Block::default(); - if !lexer.skip(Token::Separator(';')) { let (condition, span) = - lexer.capture_span(|lexer| -> Result<_, Error<'_>> { - let condition = self.general_expression(lexer, ctx)?; - lexer.expect(Token::Separator(';'))?; - Ok(condition) - })?; + lexer.capture_span(|lexer| this.general_expression(lexer, ctx))?; let mut reject = ast::Block::default(); reject.stmts.push(ast::Statement { kind: ast::StatementKind::Break, span, }); + body.stmts.push(ast::Statement { kind: ast::StatementKind::If { condition, @@ -2105,88 +2071,149 @@ impl Parser { }, span, }); - }; - - let mut continuing = ast::Block::default(); - if !lexer.skip(Token::Paren(')')) { - self.function_call_or_assignment_statement( - lexer, - ctx, - &mut continuing, - )?; - lexer.expect(Token::Paren(')'))?; + + let (block, span) = this.block(lexer, ctx, brace_nesting_level)?; + body.stmts.push(ast::Statement { + kind: ast::StatementKind::Block(block), + span, + }); + + ast::StatementKind::Loop { + body, + continuing: ast::Block::default(), + break_if: None, + } } + "for" => { + let _ = lexer.next(); + lexer.expect(Token::Paren('('))?; + + ctx.local_table.push_scope(); + + if !lexer.skip(Token::Separator(';')) { + let num_statements = block.stmts.len(); + let (_, span) = { + let ctx = &mut *ctx; + let block = &mut *block; + lexer.capture_span(|lexer| { + this.statement(lexer, ctx, block, brace_nesting_level) + })? + }; + + if block.stmts.len() != num_statements { + match block.stmts.last().unwrap().kind { + ast::StatementKind::Call { .. } + | ast::StatementKind::Assign { .. } + | ast::StatementKind::LocalDecl(_) => {} + _ => return Err(Error::InvalidForInitializer(span)), + } + } + }; - let (block, span) = self.block(lexer, ctx, brace_nesting_level)?; - body.stmts.push(ast::Statement { - kind: ast::StatementKind::Block(block), - span, - }); + let mut body = ast::Block::default(); + if !lexer.skip(Token::Separator(';')) { + let (condition, span) = + lexer.capture_span(|lexer| -> Result<_, Error<'_>> { + let condition = this.general_expression(lexer, ctx)?; + lexer.expect(Token::Separator(';'))?; + Ok(condition) + })?; + let mut reject = ast::Block::default(); + reject.stmts.push(ast::Statement { + kind: ast::StatementKind::Break, + span, + }); + body.stmts.push(ast::Statement { + kind: ast::StatementKind::If { + condition, + accept: ast::Block::default(), + reject, + }, + span, + }); + }; + + let mut continuing = ast::Block::default(); + if !lexer.skip(Token::Paren(')')) { + this.function_call_or_assignment_statement( + lexer, + ctx, + &mut continuing, + )?; + lexer.expect(Token::Paren(')'))?; + } - ctx.local_table.pop_scope(); + let (block, span) = this.block(lexer, ctx, brace_nesting_level)?; + body.stmts.push(ast::Statement { + kind: ast::StatementKind::Block(block), + span, + }); - ast::StatementKind::Loop { - body, - continuing, - break_if: None, + ctx.local_table.pop_scope(); + + ast::StatementKind::Loop { + body, + continuing, + break_if: None, + } } - } - "break" => { - let (_, span) = lexer.next(); - // Check if the next token is an `if`, this indicates - // that the user tried to type out a `break if` which - // is illegal in this position. - let (peeked_token, peeked_span) = lexer.peek(); - if let Token::Word("if") = peeked_token { - let span = span.until(&peeked_span); - return Err(Error::InvalidBreakIf(span)); + "break" => { + let (_, span) = lexer.next(); + // Check if the next token is an `if`, this indicates + // that the user tried to type out a `break if` which + // is illegal in this position. + let (peeked_token, peeked_span) = lexer.peek(); + if let Token::Word("if") = peeked_token { + let span = span.until(&peeked_span); + return Err(Error::InvalidBreakIf(span)); + } + lexer.expect(Token::Separator(';'))?; + ast::StatementKind::Break } - lexer.expect(Token::Separator(';'))?; - ast::StatementKind::Break - } - "continue" => { - let _ = lexer.next(); - lexer.expect(Token::Separator(';'))?; - ast::StatementKind::Continue - } - "discard" => { - let _ = lexer.next(); - lexer.expect(Token::Separator(';'))?; - ast::StatementKind::Kill - } - // https://www.w3.org/TR/WGSL/#const-assert-statement - "const_assert" => { - let _ = lexer.next(); - // parentheses are optional - let paren = lexer.skip(Token::Paren('(')); + "continue" => { + let _ = lexer.next(); + lexer.expect(Token::Separator(';'))?; + ast::StatementKind::Continue + } + "discard" => { + let _ = lexer.next(); + lexer.expect(Token::Separator(';'))?; + ast::StatementKind::Kill + } + // https://www.w3.org/TR/WGSL/#const-assert-statement + "const_assert" => { + let _ = lexer.next(); + // parentheses are optional + let paren = lexer.skip(Token::Paren('(')); - let condition = self.general_expression(lexer, ctx)?; + let condition = this.general_expression(lexer, ctx)?; - if paren { - lexer.expect(Token::Paren(')'))?; + if paren { + lexer.expect(Token::Paren(')'))?; + } + lexer.expect(Token::Separator(';'))?; + ast::StatementKind::ConstAssert(condition) } - lexer.expect(Token::Separator(';'))?; - ast::StatementKind::ConstAssert(condition) - } - // assignment or a function call - _ => { - self.function_call_or_assignment_statement(lexer, ctx, block)?; - lexer.expect(Token::Separator(';'))?; - self.pop_rule_span(lexer); - return Ok(()); - } - }; + // assignment or a function call + _ => { + this.function_call_or_assignment_statement(lexer, ctx, block)?; + lexer.expect(Token::Separator(';'))?; + this.pop_rule_span(lexer); + return Ok(()); + } + }; - let span = self.pop_rule_span(lexer); - block.stmts.push(ast::Statement { kind, span }); - } - _ => { - self.assignment_statement(lexer, ctx, block)?; - lexer.expect(Token::Separator(';'))?; - self.pop_rule_span(lexer); + let span = this.pop_rule_span(lexer); + block.stmts.push(ast::Statement { kind, span }); + } + _ => { + this.assignment_statement(lexer, ctx, block)?; + lexer.expect(Token::Separator(';'))?; + this.pop_rule_span(lexer); + } } - } - Ok(()) + Ok(()) + }) } fn r#loop<'a>( @@ -2338,6 +2365,7 @@ impl Parser { &mut self, lexer: &mut Lexer<'a>, diagnostic_filter_leaf: Option>, + must_use: Option, out: &mut ast::TranslationUnit<'a>, dependencies: &mut FastIndexSet>, ) -> Result, Error<'a>> { @@ -2389,7 +2417,17 @@ impl Parser { let result = if lexer.skip(Token::Arrow) { let binding = self.varying_binding(lexer, &mut ctx)?; let ty = self.type_decl(lexer, &mut ctx)?; - Some(ast::FunctionResult { ty, binding }) + let must_use = must_use.is_some(); + Some(ast::FunctionResult { + ty, + binding, + must_use, + }) + } else if let Some(must_use) = must_use { + return Err(Error::FunctionMustUseReturnsVoid( + must_use, + self.peek_rule_span(lexer), + )); } else { None }; @@ -2463,6 +2501,8 @@ impl Parser { (ParsedAttribute::default(), ParsedAttribute::default()); let mut id = ParsedAttribute::default(); + let mut must_use: ParsedAttribute = ParsedAttribute::default(); + let mut dependencies = FastIndexSet::default(); let mut ctx = ExpressionContext { expressions: &mut out.expressions, @@ -2547,6 +2587,9 @@ impl Parser { }; early_depth_test.set(crate::EarlyDepthTest { conservative }, name_span)?; } + "must_use" => { + must_use.set(name_span, name_span)?; + } _ => return Err(Error::UnknownAttribute(name_span)), } } @@ -2652,8 +2695,14 @@ impl Parser { diagnostic_filters, out.diagnostic_filter_leaf, ); - let function = - self.function_decl(lexer, diagnostic_filter_leaf, out, &mut dependencies)?; + + let function = self.function_decl( + lexer, + diagnostic_filter_leaf, + must_use.value, + out, + &mut dependencies, + )?; Some(ast::GlobalDeclKind::Fn(ast::Function { entry_point: if let Some(stage) = stage.value { if stage == ShaderStage::Compute && workgroup_size.value.is_none() { diff --git a/naga/src/lib.rs b/naga/src/lib.rs index d03608fb5d..19f9cd96b0 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -283,9 +283,15 @@ pub const BOOL_WIDTH: Bytes = 1; pub const ABSTRACT_WIDTH: Bytes = 8; /// Hash map that is faster but not resilient to DoS attacks. -pub type FastHashMap = rustc_hash::FxHashMap; +/// (Similar to rustc_hash::FxHashMap but using hashbrown::HashMap instead of std::collections::HashMap.) +/// To construct a new instance: `FastHashMap::default()` +pub type FastHashMap = + hashbrown::HashMap>; + /// Hash set that is faster but not resilient to DoS attacks. -pub type FastHashSet = rustc_hash::FxHashSet; +/// (Similar to rustc_hash::FxHashSet but using hashbrown::HashSet instead of std::collections::HashMap.) +pub type FastHashSet = + hashbrown::HashSet>; /// Insertion-order-preserving hash set (`IndexSet`), but with the same /// hasher as `FastHashSet` (faster but not resilient to DoS attacks). @@ -325,6 +331,7 @@ pub(crate) type NamedExpressions = FastIndexMap, String>; pub struct EarlyDepthTest { pub conservative: Option, } + /// Enables adjusting depth without disabling early Z. /// /// To use in a shader: @@ -966,7 +973,7 @@ pub enum Binding { } /// Pipeline binding information for global resources. -#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[cfg_attr(feature = "serialize", derive(Serialize))] #[cfg_attr(feature = "deserialize", derive(Deserialize))] #[cfg_attr(feature = "arbitrary", derive(Arbitrary))] @@ -2184,18 +2191,29 @@ pub struct Function { pub local_variables: Arena, /// Expressions used inside this function. /// - /// If an [`Expression`] is in this arena, then its subexpressions are in this - /// arena too. In other words, every `Handle` in this arena - /// refers to an [`Expression`] in this arena too. The only way this arena - /// can refer to [`Module::global_expressions`] is indirectly, via - /// [`Constant`] and [`Override`] expressions, which hold handles for their - /// respective types. + /// Unless explicitly stated otherwise, if an [`Expression`] is in this + /// arena, then its subexpressions are in this arena too. In other words, + /// every `Handle` in this arena refers to an [`Expression`] in + /// this arena too. + /// + /// The main ways this arena refers to [`Module::global_expressions`] are: + /// + /// - [`Constant`], [`Override`], and [`GlobalVariable`] expressions hold + /// handles for their respective types, whose initializer expressions are + /// in [`Module::global_expressions`]. + /// + /// - Various expressions hold [`Type`] handles, and [`Type`]s may refer to + /// global expressions, for things like array lengths. + /// + /// - [`Expression::ImageSample::offset`] refers to an expression in + /// [`Module::global_expressions`]. /// /// An [`Expression`] must occur before all other [`Expression`]s that use /// its value. /// /// [`Constant`]: Expression::Constant /// [`Override`]: Expression::Override + /// [`GlobalVariable`]: Expression::GlobalVariable pub expressions: Arena, /// Map of expressions that have associated variable names pub named_expressions: NamedExpressions, @@ -2390,12 +2408,37 @@ pub enum RayQueryIntersection { /// Alternatively, you can load an existing shader using one of the [available front ends][front]. /// /// When finished, you can export modules using one of the [available backends][back]. +/// +/// ## Module arenas +/// +/// Most module contents are stored in [`Arena`]s. In a valid module, arena +/// elements only refer to prior arena elements. That is, whenever an element in +/// some `Arena` contains a `Handle` referring to another element the same +/// arena, the handle's referent always precedes the element containing the +/// handle. +/// +/// The elements of [`Module::types`] may refer to [`Expression`]s in +/// [`Module::global_expressions`], and those expressions may in turn refer back +/// to [`Type`]s in [`Module::types`]. In a valid module, there exists an order +/// in which all types and global expressions can be visited such that: +/// +/// - types and expressions are visited in the order in which they appear in +/// their arenas, and +/// +/// - every element refers only to previously visited elements. +/// +/// This implies that the graph of types and global expressions is acyclic. +/// (However, it is a stronger condition: there are cycle-free arrangements of +/// types and expressions for which an order like the one described above does +/// not exist. Modules arranged in such a way are not valid.) #[derive(Debug, Default, Clone)] #[cfg_attr(feature = "serialize", derive(Serialize))] #[cfg_attr(feature = "deserialize", derive(Deserialize))] #[cfg_attr(feature = "arbitrary", derive(Arbitrary))] pub struct Module { /// Arena for the types defined in this module. + /// + /// See the [`Module`] docs for more details about this field. pub types: UniqueArena, /// Dictionary of special type handles. pub special_types: SpecialTypes, @@ -2411,8 +2454,7 @@ pub struct Module { /// arena too. In other words, every `Handle` in this arena /// refers to an [`Expression`] in this arena too. /// - /// Each `Expression` must occur in the arena before any - /// `Expression` that uses its value. + /// See the [`Module`] docs for more details about this field. /// /// [Constant expressions]: index.html#constant-expressions /// [override expressions]: index.html#override-expressions diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs index d98c024635..edc06b4436 100644 --- a/naga/src/proc/constant_evaluator.rs +++ b/naga/src/proc/constant_evaluator.rs @@ -1400,14 +1400,19 @@ impl<'a> ConstantEvaluator<'a> { /// [`Compose`]: Expression::Compose fn eval_zero_value_and_splat( &mut self, - expr: Handle, + mut expr: Handle, span: Span, ) -> Result, ConstantEvaluatorError> { - match self.expressions[expr] { - Expression::ZeroValue(ty) => self.eval_zero_value_impl(ty, span), - Expression::Splat { size, value } => self.splat(value, size, span), - _ => Ok(expr), + // The result of the splat() for a Splat of a scalar ZeroValue is a + // vector ZeroValue, so we must call eval_zero_value_impl() after + // splat() in order to ensure we have no ZeroValues remaining. + if let Expression::Splat { size, value } = self.expressions[expr] { + expr = self.splat(value, size, span)?; } + if let Expression::ZeroValue(ty) = self.expressions[expr] { + expr = self.eval_zero_value_impl(ty, span)?; + } + Ok(expr) } /// Lower [`ZeroValue`] expressions to [`Literal`] and [`Compose`] expressions. @@ -2978,4 +2983,84 @@ mod tests { panic!("unexpected evaluation result") } } + + #[test] + fn splat_of_zero_value() { + let mut types = UniqueArena::new(); + let constants = Arena::new(); + let overrides = Arena::new(); + let mut global_expressions = Arena::new(); + + let f32_ty = types.insert( + Type { + name: None, + inner: TypeInner::Scalar(crate::Scalar::F32), + }, + Default::default(), + ); + + let vec2_f32_ty = types.insert( + Type { + name: None, + inner: TypeInner::Vector { + size: VectorSize::Bi, + scalar: crate::Scalar::F32, + }, + }, + Default::default(), + ); + + let five = + global_expressions.append(Expression::Literal(Literal::F32(5.0)), Default::default()); + let five_splat = global_expressions.append( + Expression::Splat { + size: VectorSize::Bi, + value: five, + }, + Default::default(), + ); + let zero = global_expressions.append(Expression::ZeroValue(f32_ty), Default::default()); + let zero_splat = global_expressions.append( + Expression::Splat { + size: VectorSize::Bi, + value: zero, + }, + Default::default(), + ); + + let expression_kind_tracker = &mut ExpressionKindTracker::from_arena(&global_expressions); + let mut solver = ConstantEvaluator { + behavior: Behavior::Wgsl(WgslRestrictions::Const(None)), + types: &mut types, + constants: &constants, + overrides: &overrides, + expressions: &mut global_expressions, + expression_kind_tracker, + }; + + let solved_add = solver + .try_eval_and_append( + Expression::Binary { + op: crate::BinaryOperator::Add, + left: zero_splat, + right: five_splat, + }, + Default::default(), + ) + .unwrap(); + + let pass = match global_expressions[solved_add] { + Expression::Compose { ty, ref components } => { + ty == vec2_f32_ty + && components.iter().all(|&component| { + let component = &global_expressions[component]; + matches!(*component, Expression::Literal(Literal::F32(5.0))) + }) + } + _ => false, + }; + if !pass { + panic!("unexpected evaluation result") + } + } } diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index bd563eb1e4..a6a19f70ed 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -8,6 +8,7 @@ pub mod index; mod layouter; mod namer; mod terminator; +mod type_methods; mod typifier; pub use constant_evaluator::{ @@ -74,91 +75,6 @@ impl From for super::Scalar { } } -impl super::ScalarKind { - pub const fn is_numeric(self) -> bool { - match self { - crate::ScalarKind::Sint - | crate::ScalarKind::Uint - | crate::ScalarKind::Float - | crate::ScalarKind::AbstractInt - | crate::ScalarKind::AbstractFloat => true, - crate::ScalarKind::Bool => false, - } - } -} - -impl super::Scalar { - pub const I32: Self = Self { - kind: crate::ScalarKind::Sint, - width: 4, - }; - pub const U32: Self = Self { - kind: crate::ScalarKind::Uint, - width: 4, - }; - pub const F32: Self = Self { - kind: crate::ScalarKind::Float, - width: 4, - }; - pub const F64: Self = Self { - kind: crate::ScalarKind::Float, - width: 8, - }; - pub const I64: Self = Self { - kind: crate::ScalarKind::Sint, - width: 8, - }; - pub const U64: Self = Self { - kind: crate::ScalarKind::Uint, - width: 8, - }; - pub const BOOL: Self = Self { - kind: crate::ScalarKind::Bool, - width: crate::BOOL_WIDTH, - }; - pub const ABSTRACT_INT: Self = Self { - kind: crate::ScalarKind::AbstractInt, - width: crate::ABSTRACT_WIDTH, - }; - pub const ABSTRACT_FLOAT: Self = Self { - kind: crate::ScalarKind::AbstractFloat, - width: crate::ABSTRACT_WIDTH, - }; - - pub const fn is_abstract(self) -> bool { - match self.kind { - crate::ScalarKind::AbstractInt | crate::ScalarKind::AbstractFloat => true, - crate::ScalarKind::Sint - | crate::ScalarKind::Uint - | crate::ScalarKind::Float - | crate::ScalarKind::Bool => false, - } - } - - /// Construct a float `Scalar` with the given width. - /// - /// This is especially common when dealing with - /// `TypeInner::Matrix`, where the scalar kind is implicit. - pub const fn float(width: crate::Bytes) -> Self { - Self { - kind: crate::ScalarKind::Float, - width, - } - } - - pub const fn to_inner_scalar(self) -> crate::TypeInner { - crate::TypeInner::Scalar(self) - } - - pub const fn to_inner_vector(self, size: crate::VectorSize) -> crate::TypeInner { - crate::TypeInner::Vector { size, scalar: self } - } - - pub const fn to_inner_atomic(self) -> crate::TypeInner { - crate::TypeInner::Atomic(self) - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum HashableLiteral { F64(u64), @@ -240,176 +156,6 @@ impl crate::Literal { } } -pub const POINTER_SPAN: u32 = 4; - -impl super::TypeInner { - /// Return the scalar type of `self`. - /// - /// If `inner` is a scalar, vector, or matrix type, return - /// its scalar type. Otherwise, return `None`. - pub const fn scalar(&self) -> Option { - use crate::TypeInner as Ti; - match *self { - Ti::Scalar(scalar) | Ti::Vector { scalar, .. } => Some(scalar), - Ti::Matrix { scalar, .. } => Some(scalar), - _ => None, - } - } - - pub fn scalar_kind(&self) -> Option { - self.scalar().map(|scalar| scalar.kind) - } - - /// Returns the scalar width in bytes - pub fn scalar_width(&self) -> Option { - self.scalar().map(|scalar| scalar.width) - } - - pub const fn pointer_space(&self) -> Option { - match *self { - Self::Pointer { space, .. } => Some(space), - Self::ValuePointer { space, .. } => Some(space), - _ => None, - } - } - - pub fn is_atomic_pointer(&self, types: &crate::UniqueArena) -> bool { - match *self { - crate::TypeInner::Pointer { base, .. } => match types[base].inner { - crate::TypeInner::Atomic { .. } => true, - _ => false, - }, - _ => false, - } - } - - /// Get the size of this type. - pub fn size(&self, _gctx: GlobalCtx) -> u32 { - match *self { - Self::Scalar(scalar) | Self::Atomic(scalar) => scalar.width as u32, - Self::Vector { size, scalar } => size as u32 * scalar.width as u32, - // matrices are treated as arrays of aligned columns - Self::Matrix { - columns, - rows, - scalar, - } => Alignment::from(rows) * scalar.width as u32 * columns as u32, - Self::Pointer { .. } | Self::ValuePointer { .. } => POINTER_SPAN, - Self::Array { - base: _, - size, - stride, - } => { - let count = match size { - super::ArraySize::Constant(count) => count.get(), - // any struct member or array element needing a size at pipeline-creation time - // must have a creation-fixed footprint - super::ArraySize::Pending(_) => 0, - // A dynamically-sized array has to have at least one element - super::ArraySize::Dynamic => 1, - }; - count * stride - } - Self::Struct { span, .. } => span, - Self::Image { .. } - | Self::Sampler { .. } - | Self::AccelerationStructure { .. } - | Self::RayQuery { .. } - | Self::BindingArray { .. } => 0, - } - } - - /// Return the canonical form of `self`, or `None` if it's already in - /// canonical form. - /// - /// Certain types have multiple representations in `TypeInner`. This - /// function converts all forms of equivalent types to a single - /// representative of their class, so that simply applying `Eq` to the - /// result indicates whether the types are equivalent, as far as Naga IR is - /// concerned. - pub fn canonical_form( - &self, - types: &crate::UniqueArena, - ) -> Option { - use crate::TypeInner as Ti; - match *self { - Ti::Pointer { base, space } => match types[base].inner { - Ti::Scalar(scalar) => Some(Ti::ValuePointer { - size: None, - scalar, - space, - }), - Ti::Vector { size, scalar } => Some(Ti::ValuePointer { - size: Some(size), - scalar, - space, - }), - _ => None, - }, - _ => None, - } - } - - /// Compare `self` and `rhs` as types. - /// - /// This is mostly the same as `::eq`, but it treats - /// `ValuePointer` and `Pointer` types as equivalent. - /// - /// When you know that one side of the comparison is never a pointer, it's - /// fine to not bother with canonicalization, and just compare `TypeInner` - /// values with `==`. - pub fn equivalent( - &self, - rhs: &crate::TypeInner, - types: &crate::UniqueArena, - ) -> bool { - let left = self.canonical_form(types); - let right = rhs.canonical_form(types); - left.as_ref().unwrap_or(self) == right.as_ref().unwrap_or(rhs) - } - - pub fn is_dynamically_sized(&self, types: &crate::UniqueArena) -> bool { - use crate::TypeInner as Ti; - match *self { - Ti::Array { size, .. } => size == crate::ArraySize::Dynamic, - Ti::Struct { ref members, .. } => members - .last() - .map(|last| types[last.ty].inner.is_dynamically_sized(types)) - .unwrap_or(false), - _ => false, - } - } - - pub fn components(&self) -> Option { - Some(match *self { - Self::Vector { size, .. } => size as u32, - Self::Matrix { columns, .. } => columns as u32, - Self::Array { - size: crate::ArraySize::Constant(len), - .. - } => len.get(), - Self::Struct { ref members, .. } => members.len() as u32, - _ => return None, - }) - } - - pub fn component_type(&self, index: usize) -> Option { - Some(match *self { - Self::Vector { scalar, .. } => TypeResolution::Value(crate::TypeInner::Scalar(scalar)), - Self::Matrix { rows, scalar, .. } => { - TypeResolution::Value(crate::TypeInner::Vector { size: rows, scalar }) - } - Self::Array { - base, - size: crate::ArraySize::Constant(_), - .. - } => TypeResolution::Handle(base), - Self::Struct { ref members, .. } => TypeResolution::Handle(members[index].ty), - _ => return None, - }) - } -} - impl super::AddressSpace { pub fn access(self) -> crate::StorageAccess { use crate::StorageAccess as Sa; diff --git a/naga/src/proc/terminator.rs b/naga/src/proc/terminator.rs index 19c37294ec..f22e61e6a6 100644 --- a/naga/src/proc/terminator.rs +++ b/naga/src/proc/terminator.rs @@ -28,9 +28,10 @@ pub fn ensure_block_returns(block: &mut crate::Block) { } } } - Some(&mut (S::Emit(_) | S::Break | S::Continue | S::Return { .. } | S::Kill)) => (), + Some(&mut (S::Break | S::Continue | S::Return { .. } | S::Kill)) => (), Some( - &mut (S::Loop { .. } + &mut (S::Emit(_) + | S::Loop { .. } | S::Store { .. } | S::ImageStore { .. } | S::Call { .. } diff --git a/naga/src/proc/type_methods.rs b/naga/src/proc/type_methods.rs new file mode 100644 index 0000000000..6276c6bcf1 --- /dev/null +++ b/naga/src/proc/type_methods.rs @@ -0,0 +1,262 @@ +//! Methods on [`TypeInner`], [`Scalar`], and [`ScalarKind`]. +//! +//! [`TypeInner`]: crate::TypeInner +//! [`Scalar`]: crate::Scalar +//! [`ScalarKind`]: crate::ScalarKind + +use super::TypeResolution; + +impl crate::ScalarKind { + pub const fn is_numeric(self) -> bool { + match self { + crate::ScalarKind::Sint + | crate::ScalarKind::Uint + | crate::ScalarKind::Float + | crate::ScalarKind::AbstractInt + | crate::ScalarKind::AbstractFloat => true, + crate::ScalarKind::Bool => false, + } + } +} + +impl crate::Scalar { + pub const I32: Self = Self { + kind: crate::ScalarKind::Sint, + width: 4, + }; + pub const U32: Self = Self { + kind: crate::ScalarKind::Uint, + width: 4, + }; + pub const F32: Self = Self { + kind: crate::ScalarKind::Float, + width: 4, + }; + pub const F64: Self = Self { + kind: crate::ScalarKind::Float, + width: 8, + }; + pub const I64: Self = Self { + kind: crate::ScalarKind::Sint, + width: 8, + }; + pub const U64: Self = Self { + kind: crate::ScalarKind::Uint, + width: 8, + }; + pub const BOOL: Self = Self { + kind: crate::ScalarKind::Bool, + width: crate::BOOL_WIDTH, + }; + pub const ABSTRACT_INT: Self = Self { + kind: crate::ScalarKind::AbstractInt, + width: crate::ABSTRACT_WIDTH, + }; + pub const ABSTRACT_FLOAT: Self = Self { + kind: crate::ScalarKind::AbstractFloat, + width: crate::ABSTRACT_WIDTH, + }; + + pub const fn is_abstract(self) -> bool { + match self.kind { + crate::ScalarKind::AbstractInt | crate::ScalarKind::AbstractFloat => true, + crate::ScalarKind::Sint + | crate::ScalarKind::Uint + | crate::ScalarKind::Float + | crate::ScalarKind::Bool => false, + } + } + + /// Construct a float `Scalar` with the given width. + /// + /// This is especially common when dealing with + /// `TypeInner::Matrix`, where the scalar kind is implicit. + pub const fn float(width: crate::Bytes) -> Self { + Self { + kind: crate::ScalarKind::Float, + width, + } + } + + pub const fn to_inner_scalar(self) -> crate::TypeInner { + crate::TypeInner::Scalar(self) + } + + pub const fn to_inner_vector(self, size: crate::VectorSize) -> crate::TypeInner { + crate::TypeInner::Vector { size, scalar: self } + } + + pub const fn to_inner_atomic(self) -> crate::TypeInner { + crate::TypeInner::Atomic(self) + } +} + +const POINTER_SPAN: u32 = 4; + +impl crate::TypeInner { + /// Return the scalar type of `self`. + /// + /// If `inner` is a scalar, vector, or matrix type, return + /// its scalar type. Otherwise, return `None`. + pub const fn scalar(&self) -> Option { + use crate::TypeInner as Ti; + match *self { + Ti::Scalar(scalar) | Ti::Vector { scalar, .. } => Some(scalar), + Ti::Matrix { scalar, .. } => Some(scalar), + _ => None, + } + } + + pub fn scalar_kind(&self) -> Option { + self.scalar().map(|scalar| scalar.kind) + } + + /// Returns the scalar width in bytes + pub fn scalar_width(&self) -> Option { + self.scalar().map(|scalar| scalar.width) + } + + pub const fn pointer_space(&self) -> Option { + match *self { + Self::Pointer { space, .. } => Some(space), + Self::ValuePointer { space, .. } => Some(space), + _ => None, + } + } + + pub fn is_atomic_pointer(&self, types: &crate::UniqueArena) -> bool { + match *self { + crate::TypeInner::Pointer { base, .. } => match types[base].inner { + crate::TypeInner::Atomic { .. } => true, + _ => false, + }, + _ => false, + } + } + + /// Get the size of this type. + pub fn size(&self, _gctx: super::GlobalCtx) -> u32 { + match *self { + Self::Scalar(scalar) | Self::Atomic(scalar) => scalar.width as u32, + Self::Vector { size, scalar } => size as u32 * scalar.width as u32, + // matrices are treated as arrays of aligned columns + Self::Matrix { + columns, + rows, + scalar, + } => super::Alignment::from(rows) * scalar.width as u32 * columns as u32, + Self::Pointer { .. } | Self::ValuePointer { .. } => POINTER_SPAN, + Self::Array { + base: _, + size, + stride, + } => { + let count = match size { + crate::ArraySize::Constant(count) => count.get(), + // any struct member or array element needing a size at pipeline-creation time + // must have a creation-fixed footprint + crate::ArraySize::Pending(_) => 0, + // A dynamically-sized array has to have at least one element + crate::ArraySize::Dynamic => 1, + }; + count * stride + } + Self::Struct { span, .. } => span, + Self::Image { .. } + | Self::Sampler { .. } + | Self::AccelerationStructure { .. } + | Self::RayQuery { .. } + | Self::BindingArray { .. } => 0, + } + } + + /// Return the canonical form of `self`, or `None` if it's already in + /// canonical form. + /// + /// Certain types have multiple representations in `TypeInner`. This + /// function converts all forms of equivalent types to a single + /// representative of their class, so that simply applying `Eq` to the + /// result indicates whether the types are equivalent, as far as Naga IR is + /// concerned. + pub fn canonical_form( + &self, + types: &crate::UniqueArena, + ) -> Option { + use crate::TypeInner as Ti; + match *self { + Ti::Pointer { base, space } => match types[base].inner { + Ti::Scalar(scalar) => Some(Ti::ValuePointer { + size: None, + scalar, + space, + }), + Ti::Vector { size, scalar } => Some(Ti::ValuePointer { + size: Some(size), + scalar, + space, + }), + _ => None, + }, + _ => None, + } + } + + /// Compare `self` and `rhs` as types. + /// + /// This is mostly the same as `::eq`, but it treats + /// `ValuePointer` and `Pointer` types as equivalent. + /// + /// When you know that one side of the comparison is never a pointer, it's + /// fine to not bother with canonicalization, and just compare `TypeInner` + /// values with `==`. + pub fn equivalent( + &self, + rhs: &crate::TypeInner, + types: &crate::UniqueArena, + ) -> bool { + let left = self.canonical_form(types); + let right = rhs.canonical_form(types); + left.as_ref().unwrap_or(self) == right.as_ref().unwrap_or(rhs) + } + + pub fn is_dynamically_sized(&self, types: &crate::UniqueArena) -> bool { + use crate::TypeInner as Ti; + match *self { + Ti::Array { size, .. } => size == crate::ArraySize::Dynamic, + Ti::Struct { ref members, .. } => members + .last() + .map(|last| types[last.ty].inner.is_dynamically_sized(types)) + .unwrap_or(false), + _ => false, + } + } + + pub fn components(&self) -> Option { + Some(match *self { + Self::Vector { size, .. } => size as u32, + Self::Matrix { columns, .. } => columns as u32, + Self::Array { + size: crate::ArraySize::Constant(len), + .. + } => len.get(), + Self::Struct { ref members, .. } => members.len() as u32, + _ => return None, + }) + } + + pub fn component_type(&self, index: usize) -> Option { + Some(match *self { + Self::Vector { scalar, .. } => TypeResolution::Value(crate::TypeInner::Scalar(scalar)), + Self::Matrix { rows, scalar, .. } => { + TypeResolution::Value(crate::TypeInner::Vector { size: rows, scalar }) + } + Self::Array { + base, + size: crate::ArraySize::Constant(_), + .. + } => TypeResolution::Handle(base), + Self::Struct { ref members, .. } => TypeResolution::Handle(members[index].ty), + _ => return None, + }) + } +} diff --git a/naga/src/span.rs b/naga/src/span.rs index 7c1ce17dca..0256e19dc4 100644 --- a/naga/src/span.rs +++ b/naga/src/span.rs @@ -314,7 +314,9 @@ impl WithSpan { /// Convenience trait for [`Error`] to be able to apply spans to anything. pub(crate) trait AddSpan: Sized { + /// The returned output type. type Output; + /// See [`WithSpan::new`]. fn with_span(self) -> Self::Output; /// See [`WithSpan::with_span`]. @@ -325,6 +327,30 @@ pub(crate) trait AddSpan: Sized { fn with_span_handle>(self, handle: Handle, arena: &A) -> Self::Output; } +impl AddSpan for E { + type Output = WithSpan; + + fn with_span(self) -> WithSpan { + WithSpan::new(self) + } + + fn with_span_static(self, span: Span, description: &'static str) -> WithSpan { + WithSpan::new(self).with_span(span, description) + } + + fn with_span_context(self, span_context: SpanContext) -> WithSpan { + WithSpan::new(self).with_context(span_context) + } + + fn with_span_handle>( + self, + handle: Handle, + arena: &A, + ) -> WithSpan { + WithSpan::new(self).with_handle(handle, arena) + } +} + /// Trait abstracting over getting a span from an [`Arena`] or a [`UniqueArena`]. pub(crate) trait SpanProvider { fn get_span(&self, handle: Handle) -> Span; @@ -351,36 +377,12 @@ impl SpanProvider for UniqueArena { } } -impl AddSpan for E -where - E: Error, -{ - type Output = WithSpan; - fn with_span(self) -> WithSpan { - WithSpan::new(self) - } - - fn with_span_static(self, span: Span, description: &'static str) -> WithSpan { - WithSpan::new(self).with_span(span, description) - } - - fn with_span_context(self, span_context: SpanContext) -> WithSpan { - WithSpan::new(self).with_context(span_context) - } - - fn with_span_handle>( - self, - handle: Handle, - arena: &A, - ) -> WithSpan { - WithSpan::new(self).with_handle(handle, arena) - } -} - /// Convenience trait for [`Result`], adding a [`MapErrWithSpan::map_err_inner`] /// mapping to [`WithSpan::and_then`]. -pub trait MapErrWithSpan: Sized { +pub(crate) trait MapErrWithSpan: Sized { + /// The returned output type. type Output: Sized; + fn map_err_inner(self, func: F) -> Self::Output where F: FnOnce(E) -> WithSpan, @@ -389,6 +391,7 @@ pub trait MapErrWithSpan: Sized { impl MapErrWithSpan for Result> { type Output = Result>; + fn map_err_inner(self, func: F) -> Result> where F: FnOnce(E) -> WithSpan, diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index 289f674cbc..b2258d09b4 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -22,8 +22,6 @@ pub enum CallError { ResultAlreadyInScope(Handle), #[error("Result expression {0:?} is populated by multiple `Call` statements")] ResultAlreadyPopulated(Handle), - #[error("Result value is invalid")] - ResultValue(#[source] ExpressionError), #[error("Requires {required} arguments, but {seen} are provided")] ArgumentCount { required: usize, seen: usize }, #[error("Argument {index} value {seen_expression:?} doesn't match the type {required:?}")] @@ -139,8 +137,17 @@ pub enum FunctionError { LastCaseFallTrough, #[error("The pointer {0:?} doesn't relate to a valid destination for a store")] InvalidStorePointer(Handle), - #[error("The value {0:?} can not be stored")] - InvalidStoreValue(Handle), + #[error("Image store texture parameter type mismatch")] + InvalidStoreTexture { + actual: Handle, + actual_ty: crate::TypeInner, + }, + #[error("Image store value parameter type mismatch")] + InvalidStoreValue { + actual: Handle, + actual_ty: crate::TypeInner, + expected_ty: crate::TypeInner, + }, #[error("The type of {value:?} doesn't match the type stored in {pointer:?}")] InvalidStoreTypes { pointer: Handle, @@ -1016,8 +1023,15 @@ impl super::Validator { let value_ty = context.resolve_type(value, &self.valid_expression_set)?; match *value_ty { Ti::Image { .. } | Ti::Sampler { .. } => { - return Err(FunctionError::InvalidStoreValue(value) - .with_span_handle(value, context.expressions)); + return Err(FunctionError::InvalidStoreTexture { + actual: value, + actual_ty: value_ty.clone(), + } + .with_span_context(( + context.expressions.get_span(value), + format!("this value is of type {value_ty:?}"), + )) + .with_span(span, "expects a texture argument")); } _ => {} } @@ -1174,9 +1188,22 @@ impl super::Validator { // The value we're writing had better match the scalar type // for `image`'s format. - if *context.resolve_type(value, &self.valid_expression_set)? != value_ty { - return Err(FunctionError::InvalidStoreValue(value) - .with_span_handle(value, context.expressions)); + let actual_value_ty = + context.resolve_type(value, &self.valid_expression_set)?; + if actual_value_ty != &value_ty { + return Err(FunctionError::InvalidStoreValue { + actual: value, + actual_ty: actual_value_ty.clone(), + expected_ty: value_ty.clone(), + } + .with_span_context(( + context.expressions.get_span(value), + format!("this value is of type {actual_value_ty:?}"), + )) + .with_span( + span, + format!("expects a value argument of type {value_ty:?}"), + )); } } S::Call { diff --git a/naga/src/valid/handles.rs b/naga/src/valid/handles.rs index 285a90ae78..d23435c167 100644 --- a/naga/src/valid/handles.rs +++ b/naga/src/valid/handles.rs @@ -139,13 +139,13 @@ impl super::Validator { validate_const_expr(init)?; } - for (_handle, override_) in overrides.iter() { + for (_handle, r#override) in overrides.iter() { let &crate::Override { name: _, id: _, ty, init, - } = override_; + } = r#override; validate_type(ty)?; if let Some(init_expr) = init { validate_const_expr(init_expr)?; @@ -323,9 +323,9 @@ impl super::Validator { crate::PendingArraySize::Expression(expr) => Some(expr), crate::PendingArraySize::Override(h) => { Self::validate_override_handle(h, overrides)?; - let override_ = &overrides[h]; - handle.check_dep(override_.ty)?; - override_.init + let r#override = &overrides[h]; + handle.check_dep(r#override.ty)?; + r#override.init } }, crate::ArraySize::Constant(_) | crate::ArraySize::Dynamic => None, @@ -363,9 +363,9 @@ impl super::Validator { handle.check_dep(constants[constant].init)?; None } - crate::Expression::Override(override_) => { - validate_override(override_)?; - if let Some(init) = overrides[override_].init { + crate::Expression::Override(r#override) => { + validate_override(r#override)?; + if let Some(init) = overrides[r#override].init { handle.check_dep(init)?; } None @@ -416,8 +416,8 @@ impl super::Validator { crate::Expression::Constant(constant) => { validate_constant(constant)?; } - crate::Expression::Override(override_) => { - validate_override(override_)?; + crate::Expression::Override(r#override) => { + validate_override(r#override)?; } crate::Expression::ZeroValue(ty) => { validate_type(ty)?; diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index f04171ed22..100372aa94 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -778,7 +778,7 @@ impl super::Validator { } if let Some(ref bind) = var.binding { - if !self.ep_resource_bindings.insert(bind.clone()) { + if !self.ep_resource_bindings.insert(*bind) { if self.flags.contains(super::ValidationFlags::BINDINGS) { return Err(EntryPointError::BindingCollision(var_handle) .with_span_handle(var_handle, &module.global_variables)); diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index 92e0bcdc98..58bec08271 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -688,12 +688,12 @@ impl Validator { })? } - for (handle, override_) in module.overrides.iter() { + for (handle, r#override) in module.overrides.iter() { self.validate_override(handle, module.to_ctx(), &mod_info) .map_err(|source| { ValidationError::Override { handle, - name: override_.name.clone().unwrap_or_default(), + name: r#override.name.clone().unwrap_or_default(), source, } .with_span_handle(handle, &module.overrides) diff --git a/naga/tests/in/abstract-types-return.wgsl b/naga/tests/in/abstract-types-return.wgsl new file mode 100644 index 0000000000..da77c129a0 --- /dev/null +++ b/naga/tests/in/abstract-types-return.wgsl @@ -0,0 +1,26 @@ +@compute @workgroup_size(1) +fn main() {} + +fn return_i32_ai() -> i32 { + return 1; +} + +fn return_u32_ai() -> u32 { + return 1; +} + +fn return_f32_ai() -> f32 { + return 1; +} + +fn return_f32_af() -> f32 { + return 1.0; +} + +fn return_vec2f32_ai() -> vec2 { + return vec2(1); +} + +fn return_arrf32_ai() -> array { + return array(1, 1, 1, 1); +} diff --git a/naga/tests/in/access.wgsl b/naga/tests/in/access.wgsl index 61156172a5..2ad53b6134 100644 --- a/naga/tests/in/access.wgsl +++ b/naga/tests/in/access.wgsl @@ -198,3 +198,47 @@ fn assign_to_ptr_components() { assign_to_arg_ptr_array_element(&a1); fetch_arg_ptr_array_element(&a1); } + +fn index_ptr(value: bool) -> bool { + var a = array(value); + let p = &a; + return p[0]; +} + +struct S { m: i32 }; + +fn member_ptr() -> i32 { + var s: S = S(42); + let p = &s; + return p.m; +} + +struct Inner { delicious: i32 } + +struct Outer { om_nom_nom: Inner, thing: u32 } + +fn let_members_of_members() -> i32 { + let thing = Outer(); + + let inner = thing.om_nom_nom; + let delishus = inner.delicious; + + if (thing.thing != u32(delishus)) { + // LOL + } + + return thing.om_nom_nom.delicious; +} + +fn var_members_of_members() -> i32 { + var thing = Outer(); + + var inner = thing.om_nom_nom; + var delishus = inner.delicious; + + if (thing.thing != u32(delishus)) { + // LOL + } + + return thing.om_nom_nom.delicious; +} diff --git a/naga/tests/in/array-in-function-return-type.wgsl b/naga/tests/in/array-in-function-return-type.wgsl index 21e2012e78..02cc1d5313 100644 --- a/naga/tests/in/array-in-function-return-type.wgsl +++ b/naga/tests/in/array-in-function-return-type.wgsl @@ -2,8 +2,12 @@ fn ret_array() -> array { return array(1.0, 2.0); } +fn ret_array_array() -> array, 3> { + return array, 3>(ret_array(), ret_array(), ret_array()); +} + @fragment fn main() -> @location(0) vec4 { - let a = ret_array(); - return vec4(a[0], a[1], 0.0, 1.0); + let a = ret_array_array(); + return vec4(a[0][0], a[0][1], 0.0, 1.0); } diff --git a/naga/tests/in/bounds-check-dynamic-buffer.param.ron b/naga/tests/in/bounds-check-dynamic-buffer.param.ron new file mode 100644 index 0000000000..8186d536fe --- /dev/null +++ b/naga/tests/in/bounds-check-dynamic-buffer.param.ron @@ -0,0 +1,17 @@ +( + hlsl: ( + binding_map: { + (group: 0, binding: 0): (space: 0, register: 0), + (group: 0, binding: 1): (space: 0, register: 1), + (group: 0, binding: 2): (space: 0, register: 0, restrict_indexing: true), + (group: 0, binding: 3): (space: 0, register: 2, dynamic_storage_buffer_offsets_index: Some(0)), + (group: 0, binding: 4): (space: 0, register: 3, dynamic_storage_buffer_offsets_index: Some(1)), + (group: 1, binding: 0): (space: 0, register: 4, dynamic_storage_buffer_offsets_index: Some(0)), + }, + dynamic_storage_buffer_offsets_targets: { + 0: (space: 0, register: 1, size: 2), + 1: (space: 0, register: 2, size: 1), + }, + restrict_indexing: true + ), +) \ No newline at end of file diff --git a/naga/tests/in/bounds-check-dynamic-buffer.wgsl b/naga/tests/in/bounds-check-dynamic-buffer.wgsl new file mode 100644 index 0000000000..fee9d9be3b --- /dev/null +++ b/naga/tests/in/bounds-check-dynamic-buffer.wgsl @@ -0,0 +1,30 @@ +@group(0) @binding(0) +var in: u32; +@group(0) @binding(1) +var out: array; + +struct T { + @size(16) + t: u32 +} + +@group(0) @binding(2) +var in_data_uniform: array; + +@group(0) @binding(3) +var in_data_storage_g0_b3: array; + +@group(0) @binding(4) +var in_data_storage_g0_b4: array; + +@group(1) @binding(0) +var in_data_storage_g1_b0: array; + +@compute @workgroup_size(1) +fn main() { + let i = in; + out[0] = in_data_uniform[i].t; + out[1] = in_data_storage_g0_b3[i].t; + out[2] = in_data_storage_g0_b4[i].t; + out[3] = in_data_storage_g1_b0[i].t; +} \ No newline at end of file diff --git a/naga/tests/in/must-use.wgsl b/naga/tests/in/must-use.wgsl new file mode 100644 index 0000000000..0251defd84 --- /dev/null +++ b/naga/tests/in/must-use.wgsl @@ -0,0 +1,23 @@ +@compute @workgroup_size(1) +fn main() {} + +@must_use +fn use_me() -> i32 { return 10; } + +fn use_return() -> i32 { + return use_me(); +} + +fn use_assign_var() -> i32 { + var q = use_me(); + return q; +} + +fn use_assign_let() -> i32 { + let q = use_me(); + return q; +} + +fn use_phony_assign() { + _ = use_me(); +} diff --git a/naga/tests/in/ray-query.wgsl b/naga/tests/in/ray-query.wgsl index 9f94356b83..0ed1606c05 100644 --- a/naga/tests/in/ray-query.wgsl +++ b/naga/tests/in/ray-query.wgsl @@ -28,8 +28,8 @@ struct RayDesc { struct RayIntersection { kind: u32, t: f32, - instance_custom_index: u32, - instance_id: u32, + instance_custom_data: u32, + instance_index: u32, sbt_record_offset: u32, geometry_index: u32, primitive_index: u32, diff --git a/naga/tests/in/skybox.param.ron b/naga/tests/in/skybox.param.ron index f95239202a..8e00ed943e 100644 --- a/naga/tests/in/skybox.param.ron +++ b/naga/tests/in/skybox.param.ron @@ -57,6 +57,9 @@ (group: 0, binding: 2): (space: 1, register: 0), }, fake_missing_bindings: false, + sampler_buffer_binding_map: { + (group: 0): (space: 2, register: 0), + }, special_constants_binding: Some((space: 0, register: 1)), zero_initialize_workgroup_memory: true, restrict_indexing: true diff --git a/naga/tests/in/storage-textures.wgsl b/naga/tests/in/storage-textures.wgsl new file mode 100644 index 0000000000..ad8d335b0c --- /dev/null +++ b/naga/tests/in/storage-textures.wgsl @@ -0,0 +1,17 @@ +@group(0) @binding(0) var s_r_r: texture_storage_2d; +@group(0) @binding(1) var s_rg_r: texture_storage_2d; +@group(0) @binding(2) var s_rgba_r: texture_storage_2d; +@compute @workgroup_size(1) fn csLoad() { + _ = textureLoad(s_r_r, vec2u(0)); + _ = textureLoad(s_rg_r, vec2u(0)); + _ = textureLoad(s_rgba_r, vec2u(0)); +} + +@group(1) @binding(0) var s_r_w: texture_storage_2d; +@group(1) @binding(1) var s_rg_w: texture_storage_2d; +@group(1) @binding(2) var s_rgba_w: texture_storage_2d; +@compute @workgroup_size(1) fn csStore() { + textureStore(s_r_w, vec2u(0), vec4f(0.0)); + textureStore(s_rg_w, vec2u(0), vec4f(0.0)); + textureStore(s_rgba_w, vec2u(0), vec4f(0.0)); +} \ No newline at end of file diff --git a/naga/tests/out/analysis/access.info.ron b/naga/tests/out/analysis/access.info.ron index 8948cb3a0a..c8a31bad2a 100644 --- a/naga/tests/out/analysis/access.info.ron +++ b/naga/tests/out/analysis/access.info.ron @@ -33,6 +33,11 @@ ("SIZED | COPY | CREATION_RESOLVED | ARGUMENT"), ("DATA | SIZED | COPY | HOST_SHAREABLE | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), ("SIZED | COPY | CREATION_RESOLVED | ARGUMENT"), + ("DATA | SIZED | COPY | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), + ("DATA | SIZED | COPY | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), + ("DATA | SIZED | COPY | IO_SHAREABLE | HOST_SHAREABLE | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), + ("DATA | SIZED | COPY | IO_SHAREABLE | HOST_SHAREABLE | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), + ("DATA | SIZED | COPY | IO_SHAREABLE | HOST_SHAREABLE | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), ], functions: [ ( @@ -2959,6 +2964,451 @@ dual_source_blending: false, diagnostic_filter_leaf: None, ), + ( + flags: ("EXPRESSIONS | BLOCKS | CONTROL_FLOW_UNIFORMITY | STRUCT_LAYOUTS | CONSTANTS | BINDINGS"), + available_stages: ("VERTEX | FRAGMENT | COMPUTE"), + uniformity: ( + non_uniform_result: Some(2), + requirements: (""), + ), + may_kill: false, + sampling_set: [], + global_uses: [ + (""), + (""), + (""), + (""), + (""), + ], + expressions: [ + ( + uniformity: ( + non_uniform_result: Some(0), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(33), + ), + ( + uniformity: ( + non_uniform_result: Some(0), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(34), + ), + ( + uniformity: ( + non_uniform_result: Some(2), + requirements: (""), + ), + ref_count: 2, + assignable_global: None, + ty: Value(Pointer( + base: 34, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(2), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Pointer( + base: 33, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(2), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(33), + ), + ], + sampling: [], + dual_source_blending: false, + diagnostic_filter_leaf: None, + ), + ( + flags: ("EXPRESSIONS | BLOCKS | CONTROL_FLOW_UNIFORMITY | STRUCT_LAYOUTS | CONSTANTS | BINDINGS"), + available_stages: ("VERTEX | FRAGMENT | COMPUTE"), + uniformity: ( + non_uniform_result: Some(2), + requirements: (""), + ), + may_kill: false, + sampling_set: [], + global_uses: [ + (""), + (""), + (""), + (""), + (""), + ], + expressions: [ + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Sint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(35), + ), + ( + uniformity: ( + non_uniform_result: Some(2), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Pointer( + base: 35, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(2), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Pointer( + base: 2, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(2), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(2), + ), + ], + sampling: [], + dual_source_blending: false, + diagnostic_filter_leaf: None, + ), + ( + flags: ("EXPRESSIONS | BLOCKS | CONTROL_FLOW_UNIFORMITY | STRUCT_LAYOUTS | CONSTANTS | BINDINGS"), + available_stages: ("VERTEX | FRAGMENT | COMPUTE"), + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + may_kill: false, + sampling_set: [], + global_uses: [ + (""), + (""), + (""), + (""), + (""), + ], + expressions: [ + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 3, + assignable_global: None, + ty: Handle(37), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(36), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(2), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(0), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Uint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Bool, + width: 1, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(36), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(2), + ), + ], + sampling: [], + dual_source_blending: false, + diagnostic_filter_leaf: None, + ), + ( + flags: ("EXPRESSIONS | BLOCKS | CONTROL_FLOW_UNIFORMITY | STRUCT_LAYOUTS | CONSTANTS | BINDINGS"), + available_stages: ("VERTEX | FRAGMENT | COMPUTE"), + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + may_kill: false, + sampling_set: [], + global_uses: [ + (""), + (""), + (""), + (""), + (""), + ], + expressions: [ + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(37), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 3, + assignable_global: None, + ty: Value(Pointer( + base: 37, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Pointer( + base: 36, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(36), + ), + ( + uniformity: ( + non_uniform_result: Some(4), + requirements: (""), + ), + ref_count: 2, + assignable_global: None, + ty: Value(Pointer( + base: 36, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(4), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Pointer( + base: 2, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(4), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(2), + ), + ( + uniformity: ( + non_uniform_result: Some(7), + requirements: (""), + ), + ref_count: 2, + assignable_global: None, + ty: Value(Pointer( + base: 2, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Pointer( + base: 0, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(0), + ), + ( + uniformity: ( + non_uniform_result: Some(7), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(2), + ), + ( + uniformity: ( + non_uniform_result: Some(7), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Uint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Bool, + width: 1, + ))), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Pointer( + base: 36, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Pointer( + base: 2, + space: Function, + )), + ), + ( + uniformity: ( + non_uniform_result: Some(1), + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Handle(2), + ), + ], + sampling: [], + dual_source_blending: false, + diagnostic_filter_leaf: None, + ), ], entry_points: [ ( diff --git a/naga/tests/out/analysis/storage-textures.info.ron b/naga/tests/out/analysis/storage-textures.info.ron new file mode 100644 index 0000000000..fbbf7206c3 --- /dev/null +++ b/naga/tests/out/analysis/storage-textures.info.ron @@ -0,0 +1,402 @@ +( + type_flags: [ + ("CREATION_RESOLVED | ARGUMENT"), + ("CREATION_RESOLVED | ARGUMENT"), + ("CREATION_RESOLVED | ARGUMENT"), + ("CREATION_RESOLVED | ARGUMENT"), + ("CREATION_RESOLVED | ARGUMENT"), + ("CREATION_RESOLVED | ARGUMENT"), + ], + functions: [], + entry_points: [ + ( + flags: ("EXPRESSIONS | BLOCKS | CONTROL_FLOW_UNIFORMITY | STRUCT_LAYOUTS | CONSTANTS | BINDINGS"), + available_stages: ("VERTEX | FRAGMENT | COMPUTE"), + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + may_kill: false, + sampling_set: [], + global_uses: [ + ("READ"), + ("READ"), + ("READ"), + (""), + (""), + (""), + ], + expressions: [ + ( + uniformity: ( + non_uniform_result: Some(0), + requirements: (""), + ), + ref_count: 1, + assignable_global: Some(0), + ty: Handle(0), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Uint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Bi, + scalar: ( + kind: Uint, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: Some(0), + requirements: (""), + ), + ref_count: 0, + assignable_global: None, + ty: Value(Vector( + size: Quad, + scalar: ( + kind: Float, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: Some(4), + requirements: (""), + ), + ref_count: 1, + assignable_global: Some(1), + ty: Handle(1), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Uint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Bi, + scalar: ( + kind: Uint, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: Some(4), + requirements: (""), + ), + ref_count: 0, + assignable_global: None, + ty: Value(Vector( + size: Quad, + scalar: ( + kind: Float, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: Some(8), + requirements: (""), + ), + ref_count: 1, + assignable_global: Some(2), + ty: Handle(2), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Uint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Bi, + scalar: ( + kind: Uint, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: Some(8), + requirements: (""), + ), + ref_count: 0, + assignable_global: None, + ty: Value(Vector( + size: Quad, + scalar: ( + kind: Float, + width: 4, + ), + )), + ), + ], + sampling: [], + dual_source_blending: false, + diagnostic_filter_leaf: None, + ), + ( + flags: ("EXPRESSIONS | BLOCKS | CONTROL_FLOW_UNIFORMITY | STRUCT_LAYOUTS | CONSTANTS | BINDINGS"), + available_stages: ("VERTEX | FRAGMENT | COMPUTE"), + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + may_kill: false, + sampling_set: [], + global_uses: [ + (""), + (""), + (""), + ("WRITE"), + ("WRITE"), + ("WRITE"), + ], + expressions: [ + ( + uniformity: ( + non_uniform_result: Some(0), + requirements: (""), + ), + ref_count: 1, + assignable_global: Some(3), + ty: Handle(3), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Uint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Bi, + scalar: ( + kind: Uint, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Float, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Quad, + scalar: ( + kind: Float, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: Some(5), + requirements: (""), + ), + ref_count: 1, + assignable_global: Some(4), + ty: Handle(4), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Uint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Bi, + scalar: ( + kind: Uint, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Float, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Quad, + scalar: ( + kind: Float, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: Some(10), + requirements: (""), + ), + ref_count: 1, + assignable_global: Some(5), + ty: Handle(5), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Uint, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Bi, + scalar: ( + kind: Uint, + width: 4, + ), + )), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Scalar(( + kind: Float, + width: 4, + ))), + ), + ( + uniformity: ( + non_uniform_result: None, + requirements: (""), + ), + ref_count: 1, + assignable_global: None, + ty: Value(Vector( + size: Quad, + scalar: ( + kind: Float, + width: 4, + ), + )), + ), + ], + sampling: [], + dual_source_blending: false, + diagnostic_filter_leaf: None, + ), + ], + const_expression_types: [], +) \ No newline at end of file diff --git a/naga/tests/out/glsl/6772-unpack-expr-accesses.main.Compute.glsl b/naga/tests/out/glsl/6772-unpack-expr-accesses.main.Compute.glsl index e857de73fe..a91356afaf 100644 --- a/naga/tests/out/glsl/6772-unpack-expr-accesses.main.Compute.glsl +++ b/naga/tests/out/glsl/6772-unpack-expr-accesses.main.Compute.glsl @@ -9,5 +9,6 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; void main() { int phony = ivec4(bitfieldExtract(int(12u), 0, 8), bitfieldExtract(int(12u), 8, 8), bitfieldExtract(int(12u), 16, 8), bitfieldExtract(int(12u), 24, 8))[2]; uint phony_1 = uvec4(bitfieldExtract(12u, 0, 8), bitfieldExtract(12u, 8, 8), bitfieldExtract(12u, 16, 8), bitfieldExtract(12u, 24, 8)).y; + return; } diff --git a/naga/tests/out/glsl/abstract-types-return.main.Compute.glsl b/naga/tests/out/glsl/abstract-types-return.main.Compute.glsl new file mode 100644 index 0000000000..07c75d7c51 --- /dev/null +++ b/naga/tests/out/glsl/abstract-types-return.main.Compute.glsl @@ -0,0 +1,36 @@ +#version 310 es + +precision highp float; +precision highp int; + +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + +int return_i32_ai() { + return 1; +} + +uint return_u32_ai() { + return 1u; +} + +float return_f32_ai() { + return 1.0; +} + +float return_f32_af() { + return 1.0; +} + +vec2 return_vec2f32_ai() { + return vec2(1.0); +} + +float[4] return_arrf32_ai() { + return float[4](1.0, 1.0, 1.0, 1.0); +} + +void main() { + return; +} + diff --git a/naga/tests/out/glsl/access.assign_through_ptr.Compute.glsl b/naga/tests/out/glsl/access.assign_through_ptr.Compute.glsl index d8b4f8fc16..004720bfc3 100644 --- a/naga/tests/out/glsl/access.assign_through_ptr.Compute.glsl +++ b/naga/tests/out/glsl/access.assign_through_ptr.Compute.glsl @@ -22,6 +22,16 @@ struct MatCx2InArray { struct AssignToMember { uint x; }; +struct S { + int m; +}; +struct Inner { + int delicious; +}; +struct Outer { + Inner om_nom_nom; + uint thing; +}; float read_from_private(inout float foo_1) { float _e1 = foo_1; @@ -62,6 +72,43 @@ void assign_to_arg_ptr_array_element(inout uint p_4[4]) { return; } +bool index_ptr(bool value) { + bool a_1[1] = bool[1](false); + a_1 = bool[1](value); + bool _e4 = a_1[0]; + return _e4; +} + +int member_ptr() { + S s = S(42); + int _e4 = s.m; + return _e4; +} + +int let_members_of_members() { + Inner inner_1 = Outer(Inner(0), 0u).om_nom_nom; + int delishus_1 = inner_1.delicious; + if ((Outer(Inner(0), 0u).thing != uint(delishus_1))) { + } + return Outer(Inner(0), 0u).om_nom_nom.delicious; +} + +int var_members_of_members() { + Outer thing = Outer(Inner(0), 0u); + Inner inner = Inner(0); + int delishus = 0; + Inner _e3 = thing.om_nom_nom; + inner = _e3; + int _e6 = inner.delicious; + delishus = _e6; + uint _e9 = thing.thing; + int _e10 = delishus; + if ((_e9 != uint(_e10))) { + } + int _e15 = thing.om_nom_nom.delicious; + return _e15; +} + void main() { uint val = 33u; vec4 arr[2] = vec4[2](vec4(6.0), vec4(7.0)); diff --git a/naga/tests/out/glsl/access.assign_to_ptr_components.Compute.glsl b/naga/tests/out/glsl/access.assign_to_ptr_components.Compute.glsl index ec83c8a2ee..b0224010aa 100644 --- a/naga/tests/out/glsl/access.assign_to_ptr_components.Compute.glsl +++ b/naga/tests/out/glsl/access.assign_to_ptr_components.Compute.glsl @@ -22,6 +22,16 @@ struct MatCx2InArray { struct AssignToMember { uint x; }; +struct S { + int m; +}; +struct Inner { + int delicious; +}; +struct Outer { + Inner om_nom_nom; + uint thing; +}; float read_from_private(inout float foo_1) { float _e1 = foo_1; @@ -62,6 +72,43 @@ void assign_to_arg_ptr_array_element(inout uint p_4[4]) { return; } +bool index_ptr(bool value) { + bool a_1[1] = bool[1](false); + a_1 = bool[1](value); + bool _e4 = a_1[0]; + return _e4; +} + +int member_ptr() { + S s = S(42); + int _e4 = s.m; + return _e4; +} + +int let_members_of_members() { + Inner inner_1 = Outer(Inner(0), 0u).om_nom_nom; + int delishus_1 = inner_1.delicious; + if ((Outer(Inner(0), 0u).thing != uint(delishus_1))) { + } + return Outer(Inner(0), 0u).om_nom_nom.delicious; +} + +int var_members_of_members() { + Outer thing = Outer(Inner(0), 0u); + Inner inner = Inner(0); + int delishus = 0; + Inner _e3 = thing.om_nom_nom; + inner = _e3; + int _e6 = inner.delicious; + delishus = _e6; + uint _e9 = thing.thing; + int _e10 = delishus; + if ((_e9 != uint(_e10))) { + } + int _e15 = thing.om_nom_nom.delicious; + return _e15; +} + void main() { AssignToMember s1_ = AssignToMember(0u); uint a1_[4] = uint[4](0u, 0u, 0u, 0u); diff --git a/naga/tests/out/glsl/access.foo_frag.Fragment.glsl b/naga/tests/out/glsl/access.foo_frag.Fragment.glsl index 90092e535e..308cf195c1 100644 --- a/naga/tests/out/glsl/access.foo_frag.Fragment.glsl +++ b/naga/tests/out/glsl/access.foo_frag.Fragment.glsl @@ -20,6 +20,16 @@ struct MatCx2InArray { struct AssignToMember { uint x; }; +struct S { + int m; +}; +struct Inner { + int delicious; +}; +struct Outer { + Inner om_nom_nom; + uint thing; +}; layout(std430) buffer Bar_block_0Fragment { mat4x3 _matrix; mat2x2 matrix_array[2]; @@ -72,6 +82,43 @@ void assign_to_arg_ptr_array_element(inout uint p_4[4]) { return; } +bool index_ptr(bool value) { + bool a_1[1] = bool[1](false); + a_1 = bool[1](value); + bool _e4 = a_1[0]; + return _e4; +} + +int member_ptr() { + S s = S(42); + int _e4 = s.m; + return _e4; +} + +int let_members_of_members() { + Inner inner_1 = Outer(Inner(0), 0u).om_nom_nom; + int delishus_1 = inner_1.delicious; + if ((Outer(Inner(0), 0u).thing != uint(delishus_1))) { + } + return Outer(Inner(0), 0u).om_nom_nom.delicious; +} + +int var_members_of_members() { + Outer thing = Outer(Inner(0), 0u); + Inner inner = Inner(0); + int delishus = 0; + Inner _e3 = thing.om_nom_nom; + inner = _e3; + int _e6 = inner.delicious; + delishus = _e6; + uint _e9 = thing.thing; + int _e10 = delishus; + if ((_e9 != uint(_e10))) { + } + int _e15 = thing.om_nom_nom.delicious; + return _e15; +} + void main() { _group_0_binding_0_fs._matrix[1][2] = 1.0; _group_0_binding_0_fs._matrix = mat4x3(vec3(0.0), vec3(1.0), vec3(2.0), vec3(3.0)); diff --git a/naga/tests/out/glsl/access.foo_vert.Vertex.glsl b/naga/tests/out/glsl/access.foo_vert.Vertex.glsl index 8e09a39c6a..c710043d23 100644 --- a/naga/tests/out/glsl/access.foo_vert.Vertex.glsl +++ b/naga/tests/out/glsl/access.foo_vert.Vertex.glsl @@ -20,6 +20,16 @@ struct MatCx2InArray { struct AssignToMember { uint x; }; +struct S { + int m; +}; +struct Inner { + int delicious; +}; +struct Outer { + Inner om_nom_nom; + uint thing; +}; layout(std430) buffer Bar_block_0Vertex { mat4x3 _matrix; mat2x2 matrix_array[2]; @@ -145,6 +155,43 @@ void assign_to_arg_ptr_array_element(inout uint p_4[4]) { return; } +bool index_ptr(bool value) { + bool a_1[1] = bool[1](false); + a_1 = bool[1](value); + bool _e4 = a_1[0]; + return _e4; +} + +int member_ptr() { + S s = S(42); + int _e4 = s.m; + return _e4; +} + +int let_members_of_members() { + Inner inner_1 = Outer(Inner(0), 0u).om_nom_nom; + int delishus_1 = inner_1.delicious; + if ((Outer(Inner(0), 0u).thing != uint(delishus_1))) { + } + return Outer(Inner(0), 0u).om_nom_nom.delicious; +} + +int var_members_of_members() { + Outer thing = Outer(Inner(0), 0u); + Inner inner = Inner(0); + int delishus = 0; + Inner _e3 = thing.om_nom_nom; + inner = _e3; + int _e6 = inner.delicious; + delishus = _e6; + uint _e9 = thing.thing; + int _e10 = delishus; + if ((_e9 != uint(_e10))) { + } + int _e15 = thing.om_nom_nom.delicious; + return _e15; +} + void main() { uint vi = uint(gl_VertexID); float foo = 0.0; @@ -156,14 +203,14 @@ void main() { mat4x3 _matrix = _group_0_binding_0_vs._matrix; uvec2 arr_1[2] = _group_0_binding_0_vs.arr; float b = _group_0_binding_0_vs._matrix[3u][0]; - int a_1 = _group_0_binding_0_vs.data[(uint(_group_0_binding_0_vs.data.length()) - 2u)].value; + int a_2 = _group_0_binding_0_vs.data[(uint(_group_0_binding_0_vs.data.length()) - 2u)].value; ivec2 c = _group_0_binding_2_vs; float _e33 = read_from_private(foo); - c2_ = int[5](a_1, int(b), 3, 4, 5); + c2_ = int[5](a_2, int(b), 3, 4, 5); c2_[(vi + 1u)] = 42; - int value = c2_[vi]; + int value_1 = c2_[vi]; float _e47 = test_arr_as_arg(float[5][10](float[10](0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), float[10](0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), float[10](0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), float[10](0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), float[10](0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))); - gl_Position = vec4((_matrix * vec4(ivec4(value))), 2.0); + gl_Position = vec4((_matrix * vec4(ivec4(value_1))), 2.0); gl_Position.yz = vec2(-gl_Position.y, gl_Position.z * 2.0 - gl_Position.w); return; } diff --git a/naga/tests/out/glsl/array-in-ctor.cs_main.Compute.glsl b/naga/tests/out/glsl/array-in-ctor.cs_main.Compute.glsl index bd918087b8..71d64585ad 100644 --- a/naga/tests/out/glsl/array-in-ctor.cs_main.Compute.glsl +++ b/naga/tests/out/glsl/array-in-ctor.cs_main.Compute.glsl @@ -13,5 +13,6 @@ layout(std430) readonly buffer Ah_block_0Compute { Ah _group_0_binding_0_cs; }; void main() { Ah ah_1 = _group_0_binding_0_cs; + return; } diff --git a/naga/tests/out/glsl/array-in-function-return-type.main.Fragment.glsl b/naga/tests/out/glsl/array-in-function-return-type.main.Fragment.glsl index 45fc31a622..3b35f9e353 100644 --- a/naga/tests/out/glsl/array-in-function-return-type.main.Fragment.glsl +++ b/naga/tests/out/glsl/array-in-function-return-type.main.Fragment.glsl @@ -9,9 +9,16 @@ float[2] ret_array() { return float[2](1.0, 2.0); } -void main() { +float[3][2] ret_array_array() { float _e0[2] = ret_array(); - _fs2p_location0 = vec4(_e0[0], _e0[1], 0.0, 1.0); + float _e1[2] = ret_array(); + float _e2[2] = ret_array(); + return float[3][2](_e0, _e1, _e2); +} + +void main() { + float _e0[3][2] = ret_array_array(); + _fs2p_location0 = vec4(_e0[0][0], _e0[0][1], 0.0, 1.0); return; } diff --git a/naga/tests/out/glsl/const-exprs.main.Compute.glsl b/naga/tests/out/glsl/const-exprs.main.Compute.glsl index b095345de9..0b318a65e3 100644 --- a/naga/tests/out/glsl/const-exprs.main.Compute.glsl +++ b/naga/tests/out/glsl/const-exprs.main.Compute.glsl @@ -23,14 +23,17 @@ const bvec2 compare_vec = bvec2(true, false); void swizzle_of_compose() { ivec4 out_ = ivec4(4, 3, 2, 1); + return; } void index_of_compose() { int out_1 = 2; + return; } void compose_three_deep() { int out_2 = 6; + return; } void non_constant_initializers() { @@ -53,14 +56,17 @@ void non_constant_initializers() { void splat_of_constant() { ivec4 out_4 = ivec4(-4, -4, -4, -4); + return; } void compose_of_constant() { ivec4 out_5 = ivec4(-4, -4, -4, -4); + return; } void compose_of_splat() { vec4 x_1 = vec4(2.0, 1.0, 1.0, 1.0); + return; } uint map_texture_kind(int texture_kind) { diff --git a/naga/tests/out/glsl/constructors.main.Compute.glsl b/naga/tests/out/glsl/constructors.main.Compute.glsl index c28401d0b4..cff178f200 100644 --- a/naga/tests/out/glsl/constructors.main.Compute.glsl +++ b/naga/tests/out/glsl/constructors.main.Compute.glsl @@ -33,5 +33,6 @@ void main() { int cit2_[4] = int[4](0, 1, 2, 3); uvec2 ic4_ = uvec2(0u, 0u); mat2x3 ic5_ = mat2x3(vec3(0.0, 0.0, 0.0), vec3(0.0, 0.0, 0.0)); + return; } diff --git a/naga/tests/out/glsl/cross.main.Compute.glsl b/naga/tests/out/glsl/cross.main.Compute.glsl index a4950274e3..c78b6ebeed 100644 --- a/naga/tests/out/glsl/cross.main.Compute.glsl +++ b/naga/tests/out/glsl/cross.main.Compute.glsl @@ -8,5 +8,6 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; void main() { vec3 a = cross(vec3(0.0, 1.0, 2.0), vec3(0.0, 1.0, 2.0)); + return; } diff --git a/naga/tests/out/glsl/globals.main.Compute.glsl b/naga/tests/out/glsl/globals.main.Compute.glsl index b7ef8bd295..bc20d6415f 100644 --- a/naga/tests/out/glsl/globals.main.Compute.glsl +++ b/naga/tests/out/glsl/globals.main.Compute.glsl @@ -49,6 +49,7 @@ void test_msl_packed_vec3_() { vec3 mvm1_ = (mat3x3(0.0) * data.v3_); vec3 svm0_ = (data.v3_ * 2.0); vec3 svm1_ = (2.0 * data.v3_); + return; } void main() { diff --git a/naga/tests/out/glsl/math-functions.main.Fragment.glsl b/naga/tests/out/glsl/math-functions.main.Fragment.glsl index 6ac3a4de0c..1877fb3d91 100644 --- a/naga/tests/out/glsl/math-functions.main.Fragment.glsl +++ b/naga/tests/out/glsl/math-functions.main.Fragment.glsl @@ -94,5 +94,6 @@ void main() { vec3 quantizeToF16_c = vec3(unpackHalf2x16(packHalf2x16(_e125.xy)), unpackHalf2x16(packHalf2x16(_e125.zz)).x); vec4 _e131 = vec4(1.0, 1.0, 1.0, 1.0); vec4 quantizeToF16_d = vec4(unpackHalf2x16(packHalf2x16(_e131.xy)), unpackHalf2x16(packHalf2x16(_e131.zw))); + return; } diff --git a/naga/tests/out/glsl/operators.main.Compute.glsl b/naga/tests/out/glsl/operators.main.Compute.glsl index 006bce205e..1c2824495f 100644 --- a/naga/tests/out/glsl/operators.main.Compute.glsl +++ b/naga/tests/out/glsl/operators.main.Compute.glsl @@ -55,6 +55,7 @@ void logical() { bvec3 bitwise_or1_ = bvec3(bvec3(true).x || bvec3(false).x, bvec3(true).y || bvec3(false).y, bvec3(true).z || bvec3(false).z); bool bitwise_and0_ = (true && false); bvec4 bitwise_and1_ = bvec4(bvec4(true).x && bvec4(false).x, bvec4(true).y && bvec4(false).y, bvec4(true).z && bvec4(false).z, bvec4(true).w && bvec4(false).w); + return; } void arithmetic() { @@ -130,6 +131,7 @@ void arithmetic() { vec3 mul_vector0_ = (mat4x3(0.0) * vec4(1.0)); vec4 mul_vector1_ = (vec3(2.0) * mat4x3(0.0)); mat3x3 mul = (mat4x3(0.0) * mat3x4(0.0)); + return; } void bit() { @@ -157,6 +159,7 @@ void bit() { uint shr1_ = (2u >> 1u); ivec2 shr2_ = (ivec2(2) >> uvec2(1u)); uvec3 shr3_ = (uvec3(2u) >> uvec3(1u)); + return; } void comparison() { @@ -196,6 +199,7 @@ void comparison() { bvec2 gte3_ = greaterThanEqual(ivec2(2), ivec2(1)); bvec3 gte4_ = greaterThanEqual(uvec3(2u), uvec3(1u)); bvec4 gte5_ = greaterThanEqual(vec4(2.0), vec4(1.0)); + return; } void assignment() { @@ -244,6 +248,7 @@ void negation_avoids_prefix_decrement() { int p5_ = -(-(-(-(1)))); int p6_ = -(-(-(-(-(1))))); int p7_ = -(-(-(-(-(1))))); + return; } void main() { diff --git a/naga/tests/out/glsl/phony_assignment.main.Compute.glsl b/naga/tests/out/glsl/phony_assignment.main.Compute.glsl index 6118e9b4a8..1bf2a8f394 100644 --- a/naga/tests/out/glsl/phony_assignment.main.Compute.glsl +++ b/naga/tests/out/glsl/phony_assignment.main.Compute.glsl @@ -19,5 +19,6 @@ void main() { int _e6 = five(); int _e7 = five(); float phony_2 = _group_0_binding_0_cs; + return; } diff --git a/naga/tests/out/glsl/separate-entry-points.fragment.Fragment.glsl b/naga/tests/out/glsl/separate-entry-points.fragment.Fragment.glsl index 9ea32684cd..52a29a3fc2 100644 --- a/naga/tests/out/glsl/separate-entry-points.fragment.Fragment.glsl +++ b/naga/tests/out/glsl/separate-entry-points.fragment.Fragment.glsl @@ -9,6 +9,7 @@ void derivatives() { float x = dFdx(0.0); float y = dFdy(0.0); float width = fwidth(0.0); + return; } void main() { diff --git a/naga/tests/out/hlsl/6772-unpack-expr-accesses.hlsl b/naga/tests/out/hlsl/6772-unpack-expr-accesses.hlsl index e50f40c8fc..c6d6c3f150 100644 --- a/naga/tests/out/hlsl/6772-unpack-expr-accesses.hlsl +++ b/naga/tests/out/hlsl/6772-unpack-expr-accesses.hlsl @@ -3,4 +3,5 @@ void main() { int phony = (int4(12u, 12u >> 8, 12u >> 16, 12u >> 24) << 24 >> 24)[2]; uint phony_1 = (uint4(12u, 12u >> 8, 12u >> 16, 12u >> 24) << 24 >> 24).y; + return; } diff --git a/naga/tests/out/hlsl/abstract-types-return.hlsl b/naga/tests/out/hlsl/abstract-types-return.hlsl new file mode 100644 index 0000000000..fe2de29aee --- /dev/null +++ b/naga/tests/out/hlsl/abstract-types-return.hlsl @@ -0,0 +1,42 @@ +int return_i32_ai() +{ + return 1; +} + +uint return_u32_ai() +{ + return 1u; +} + +float return_f32_ai() +{ + return 1.0; +} + +float return_f32_af() +{ + return 1.0; +} + +float2 return_vec2f32_ai() +{ + return (1.0).xx; +} + +typedef float ret_Constructarray4_float_[4]; +ret_Constructarray4_float_ Constructarray4_float_(float arg0, float arg1, float arg2, float arg3) { + float ret[4] = { arg0, arg1, arg2, arg3 }; + return ret; +} + +typedef float ret_return_arrf32_ai[4]; +ret_return_arrf32_ai return_arrf32_ai() +{ + return Constructarray4_float_(1.0, 1.0, 1.0, 1.0); +} + +[numthreads(1, 1, 1)] +void main() +{ + return; +} diff --git a/naga/tests/out/hlsl/abstract-types-return.ron b/naga/tests/out/hlsl/abstract-types-return.ron new file mode 100644 index 0000000000..a07b03300b --- /dev/null +++ b/naga/tests/out/hlsl/abstract-types-return.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_5_1", + ), + ], +) diff --git a/naga/tests/out/hlsl/access.hlsl b/naga/tests/out/hlsl/access.hlsl index a6fbf368b3..dcb62ae2af 100644 --- a/naga/tests/out/hlsl/access.hlsl +++ b/naga/tests/out/hlsl/access.hlsl @@ -72,6 +72,19 @@ struct AssignToMember { uint x; }; +struct S { + int m; +}; + +struct Inner { + int delicious; +}; + +struct Outer { + Inner om_nom_nom; + uint thing; +}; + GlobalConst ConstructGlobalConst(uint arg0, uint3 arg1, int arg2) { GlobalConst ret = (GlobalConst)0; ret.a = arg0; @@ -258,6 +271,66 @@ void assign_to_arg_ptr_array_element(inout uint p_4[4]) return; } +typedef bool ret_Constructarray1_bool_[1]; +ret_Constructarray1_bool_ Constructarray1_bool_(bool arg0) { + bool ret[1] = { arg0 }; + return ret; +} + +bool index_ptr(bool value) +{ + bool a_1[1] = (bool[1])0; + + a_1 = Constructarray1_bool_(value); + bool _e4 = a_1[0]; + return _e4; +} + +S ConstructS(int arg0) { + S ret = (S)0; + ret.m = arg0; + return ret; +} + +int member_ptr() +{ + S s = ConstructS(42); + + int _e4 = s.m; + return _e4; +} + +Outer ZeroValueOuter() { + return (Outer)0; +} + +int let_members_of_members() +{ + Inner inner_1 = ZeroValueOuter().om_nom_nom; + int delishus_1 = inner_1.delicious; + if ((ZeroValueOuter().thing != uint(delishus_1))) { + } + return ZeroValueOuter().om_nom_nom.delicious; +} + +int var_members_of_members() +{ + Outer thing = ZeroValueOuter(); + Inner inner = (Inner)0; + int delishus = (int)0; + + Inner _e3 = thing.om_nom_nom; + inner = _e3; + int _e6 = inner.delicious; + delishus = _e6; + uint _e9 = thing.thing; + int _e10 = delishus; + if ((_e9 != uint(_e10))) { + } + int _e15 = thing.om_nom_nom.delicious; + return _e15; +} + typedef int ret_Constructarray5_int_[5]; ret_Constructarray5_int_ Constructarray5_int_(int arg0, int arg1, int arg2, int arg3, int arg4) { int ret[5] = { arg0, arg1, arg2, arg3, arg4 }; @@ -294,14 +367,14 @@ float4 foo_vert(uint vi : SV_VertexID) : SV_Position float4x3 _matrix = float4x3(asfloat(bar.Load3(0+0)), asfloat(bar.Load3(0+16)), asfloat(bar.Load3(0+32)), asfloat(bar.Load3(0+48))); uint2 arr_1[2] = Constructarray2_uint2_(asuint(bar.Load2(144+0)), asuint(bar.Load2(144+8))); float b = asfloat(bar.Load(0+3u*16+0)); - int a_1 = asint(bar.Load(0+(((NagaBufferLengthRW(bar) - 160) / 8) - 2u)*8+160)); + int a_2 = asint(bar.Load(0+(((NagaBufferLengthRW(bar) - 160) / 8) - 2u)*8+160)); int2 c = asint(qux.Load2(0)); const float _e33 = read_from_private(foo); - c2_ = Constructarray5_int_(a_1, int(b), 3, 4, 5); + c2_ = Constructarray5_int_(a_2, int(b), 3, 4, 5); c2_[min(uint((vi + 1u)), 4u)] = 42; - int value = c2_[min(uint(vi), 4u)]; + int value_1 = c2_[min(uint(vi), 4u)]; const float _e47 = test_arr_as_arg(ZeroValuearray5_array10_float__()); - return float4(mul(float4((value).xxxx), _matrix), 2.0); + return float4(mul(float4((value_1).xxxx), _matrix), 2.0); } int2 ZeroValueint2() { diff --git a/naga/tests/out/hlsl/array-in-ctor.hlsl b/naga/tests/out/hlsl/array-in-ctor.hlsl index 1079262a01..d7054a1899 100644 --- a/naga/tests/out/hlsl/array-in-ctor.hlsl +++ b/naga/tests/out/hlsl/array-in-ctor.hlsl @@ -20,4 +20,5 @@ Ah ConstructAh(float arg0[2]) { void cs_main() { Ah ah_1 = ConstructAh(Constructarray2_float_(asfloat(ah.Load(0+0)), asfloat(ah.Load(0+4)))); + return; } diff --git a/naga/tests/out/hlsl/array-in-function-return-type.hlsl b/naga/tests/out/hlsl/array-in-function-return-type.hlsl new file mode 100644 index 0000000000..8d0eb7411c --- /dev/null +++ b/naga/tests/out/hlsl/array-in-function-return-type.hlsl @@ -0,0 +1,32 @@ +typedef float ret_Constructarray2_float_[2]; +ret_Constructarray2_float_ Constructarray2_float_(float arg0, float arg1) { + float ret[2] = { arg0, arg1 }; + return ret; +} + +typedef float ret_ret_array[2]; +ret_ret_array ret_array() +{ + return Constructarray2_float_(1.0, 2.0); +} + +typedef float ret_Constructarray3_array2_float__[3][2]; +ret_Constructarray3_array2_float__ Constructarray3_array2_float__(float arg0[2], float arg1[2], float arg2[2]) { + float ret[3][2] = { arg0, arg1, arg2 }; + return ret; +} + +typedef float ret_ret_array_array[3][2]; +ret_ret_array_array ret_array_array() +{ + const float _e0[2] = ret_array(); + const float _e1[2] = ret_array(); + const float _e2[2] = ret_array(); + return Constructarray3_array2_float__(_e0, _e1, _e2); +} + +float4 main() : SV_Target0 +{ + const float _e0[3][2] = ret_array_array(); + return float4(_e0[0][0], _e0[0][1], 0.0, 1.0); +} diff --git a/naga/tests/out/hlsl/array-in-function-return-type.ron b/naga/tests/out/hlsl/array-in-function-return-type.ron new file mode 100644 index 0000000000..341a4c528e --- /dev/null +++ b/naga/tests/out/hlsl/array-in-function-return-type.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ( + entry_point:"main", + target_profile:"ps_5_1", + ), + ], + compute:[ + ], +) diff --git a/naga/tests/out/hlsl/binding-arrays.hlsl b/naga/tests/out/hlsl/binding-arrays.hlsl index d6719c1fa6..3f0a533b6c 100644 --- a/naga/tests/out/hlsl/binding-arrays.hlsl +++ b/naga/tests/out/hlsl/binding-arrays.hlsl @@ -12,8 +12,11 @@ Texture2DArray texture_array_2darray[5] : register(t0, space2); Texture2DMS texture_array_multisampled[5] : register(t0, space3); Texture2D texture_array_depth[5] : register(t0, space4); RWTexture2D texture_array_storage[5] : register(u0, space5); -SamplerState samp[5] : register(s0, space6); -SamplerComparisonState samp_comp[5] : register(s0, space7); +SamplerState nagaSamplerHeap[2048]: register(s0, space0); +SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1); +StructuredBuffer nagaGroup0SamplerIndexArray : register(t0, space255); +static const uint samp = 0; +static const uint samp_comp = 0; cbuffer uni : register(b0, space8) { UniformIndex uni; } struct FragmentInput_main { @@ -66,22 +69,22 @@ float4 main(FragmentInput_main fragmentinput_main) : SV_Target0 u2_ = (_e27 + NagaDimensions2D(texture_array_unbounded[uniform_index])); uint2 _e32 = u2_; u2_ = (_e32 + NagaDimensions2D(texture_array_unbounded[NonUniformResourceIndex(non_uniform_index)])); - float4 _e38 = texture_array_bounded[0].Gather(samp[0], uv); + float4 _e38 = texture_array_bounded[0].Gather(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv); float4 _e39 = v4_; v4_ = (_e39 + _e38); - float4 _e45 = texture_array_bounded[uniform_index].Gather(samp[uniform_index], uv); + float4 _e45 = texture_array_bounded[uniform_index].Gather(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv); float4 _e46 = v4_; v4_ = (_e46 + _e45); - float4 _e52 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Gather(samp[NonUniformResourceIndex(non_uniform_index)], uv); + float4 _e52 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Gather(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv); float4 _e53 = v4_; v4_ = (_e53 + _e52); - float4 _e60 = texture_array_depth[0].GatherCmp(samp_comp[0], uv, 0.0); + float4 _e60 = texture_array_depth[0].GatherCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0); float4 _e61 = v4_; v4_ = (_e61 + _e60); - float4 _e68 = texture_array_depth[uniform_index].GatherCmp(samp_comp[uniform_index], uv, 0.0); + float4 _e68 = texture_array_depth[uniform_index].GatherCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0); float4 _e69 = v4_; v4_ = (_e69 + _e68); - float4 _e76 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].GatherCmp(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); + float4 _e76 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].GatherCmp(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0); float4 _e77 = v4_; v4_ = (_e77 + _e76); float4 _e82 = texture_array_unbounded[0].Load(int3(pix, 0)); @@ -111,58 +114,58 @@ float4 main(FragmentInput_main fragmentinput_main) : SV_Target0 u1_ = (_e135 + NagaMSNumSamples2D(texture_array_multisampled[uniform_index])); uint _e140 = u1_; u1_ = (_e140 + NagaMSNumSamples2D(texture_array_multisampled[NonUniformResourceIndex(non_uniform_index)])); - float4 _e146 = texture_array_bounded[0].Sample(samp[0], uv); + float4 _e146 = texture_array_bounded[0].Sample(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv); float4 _e147 = v4_; v4_ = (_e147 + _e146); - float4 _e153 = texture_array_bounded[uniform_index].Sample(samp[uniform_index], uv); + float4 _e153 = texture_array_bounded[uniform_index].Sample(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv); float4 _e154 = v4_; v4_ = (_e154 + _e153); - float4 _e160 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Sample(samp[NonUniformResourceIndex(non_uniform_index)], uv); + float4 _e160 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Sample(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv); float4 _e161 = v4_; v4_ = (_e161 + _e160); - float4 _e168 = texture_array_bounded[0].SampleBias(samp[0], uv, 0.0); + float4 _e168 = texture_array_bounded[0].SampleBias(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, 0.0); float4 _e169 = v4_; v4_ = (_e169 + _e168); - float4 _e176 = texture_array_bounded[uniform_index].SampleBias(samp[uniform_index], uv, 0.0); + float4 _e176 = texture_array_bounded[uniform_index].SampleBias(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, 0.0); float4 _e177 = v4_; v4_ = (_e177 + _e176); - float4 _e184 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleBias(samp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); + float4 _e184 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleBias(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, 0.0); float4 _e185 = v4_; v4_ = (_e185 + _e184); - float _e192 = texture_array_depth[0].SampleCmp(samp_comp[0], uv, 0.0); + float _e192 = texture_array_depth[0].SampleCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0); float _e193 = v1_; v1_ = (_e193 + _e192); - float _e200 = texture_array_depth[uniform_index].SampleCmp(samp_comp[uniform_index], uv, 0.0); + float _e200 = texture_array_depth[uniform_index].SampleCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0); float _e201 = v1_; v1_ = (_e201 + _e200); - float _e208 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmp(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); + float _e208 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmp(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0); float _e209 = v1_; v1_ = (_e209 + _e208); - float _e216 = texture_array_depth[0].SampleCmpLevelZero(samp_comp[0], uv, 0.0); + float _e216 = texture_array_depth[0].SampleCmpLevelZero(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0); float _e217 = v1_; v1_ = (_e217 + _e216); - float _e224 = texture_array_depth[uniform_index].SampleCmpLevelZero(samp_comp[uniform_index], uv, 0.0); + float _e224 = texture_array_depth[uniform_index].SampleCmpLevelZero(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0); float _e225 = v1_; v1_ = (_e225 + _e224); - float _e232 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmpLevelZero(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); + float _e232 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmpLevelZero(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0); float _e233 = v1_; v1_ = (_e233 + _e232); - float4 _e239 = texture_array_bounded[0].SampleGrad(samp[0], uv, uv, uv); + float4 _e239 = texture_array_bounded[0].SampleGrad(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, uv, uv); float4 _e240 = v4_; v4_ = (_e240 + _e239); - float4 _e246 = texture_array_bounded[uniform_index].SampleGrad(samp[uniform_index], uv, uv, uv); + float4 _e246 = texture_array_bounded[uniform_index].SampleGrad(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, uv, uv); float4 _e247 = v4_; v4_ = (_e247 + _e246); - float4 _e253 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleGrad(samp[NonUniformResourceIndex(non_uniform_index)], uv, uv, uv); + float4 _e253 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleGrad(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, uv, uv); float4 _e254 = v4_; v4_ = (_e254 + _e253); - float4 _e261 = texture_array_bounded[0].SampleLevel(samp[0], uv, 0.0); + float4 _e261 = texture_array_bounded[0].SampleLevel(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, 0.0); float4 _e262 = v4_; v4_ = (_e262 + _e261); - float4 _e269 = texture_array_bounded[uniform_index].SampleLevel(samp[uniform_index], uv, 0.0); + float4 _e269 = texture_array_bounded[uniform_index].SampleLevel(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, 0.0); float4 _e270 = v4_; v4_ = (_e270 + _e269); - float4 _e277 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleLevel(samp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); + float4 _e277 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleLevel(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, 0.0); float4 _e278 = v4_; v4_ = (_e278 + _e277); float4 _e282 = v4_; diff --git a/naga/tests/out/hlsl/boids.hlsl b/naga/tests/out/hlsl/boids.hlsl index 22e9c6cefd..8934a9bca2 100644 --- a/naga/tests/out/hlsl/boids.hlsl +++ b/naga/tests/out/hlsl/boids.hlsl @@ -41,8 +41,11 @@ void main(uint3 global_invocation_id : SV_DispatchThreadID) vPos = _e8; float2 _e14 = asfloat(particlesSrc.Load2(8+index*16+0)); vVel = _e14; + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e91 = i; i = (_e91 + 1u); diff --git a/naga/tests/out/hlsl/bounds-check-dynamic-buffer.hlsl b/naga/tests/out/hlsl/bounds-check-dynamic-buffer.hlsl new file mode 100644 index 0000000000..9e92c1ed6d --- /dev/null +++ b/naga/tests/out/hlsl/bounds-check-dynamic-buffer.hlsl @@ -0,0 +1,39 @@ +struct __dynamic_buffer_offsetsTy0 { + uint _0; + uint _1; +}; +ConstantBuffer<__dynamic_buffer_offsetsTy0> __dynamic_buffer_offsets0: register(b1, space0); + +struct __dynamic_buffer_offsetsTy1 { + uint _0; +}; +ConstantBuffer<__dynamic_buffer_offsetsTy1> __dynamic_buffer_offsets1: register(b2, space0); + +struct T { + uint t; + int _end_pad_0; + int _end_pad_1; + int _end_pad_2; +}; + +RWByteAddressBuffer in_ : register(u0); +RWByteAddressBuffer out_ : register(u1); +cbuffer in_data_uniform : register(b0) { T in_data_uniform[1]; } +RWByteAddressBuffer in_data_storage_g0_b3_ : register(u2); +RWByteAddressBuffer in_data_storage_g0_b4_ : register(u3); +RWByteAddressBuffer in_data_storage_g1_b0_ : register(u4); + +[numthreads(1, 1, 1)] +void main() +{ + uint i = asuint(in_.Load(0)); + uint _e7 = in_data_uniform[min(uint(i), 0u)].t; + out_.Store(0, asuint(_e7)); + uint _e13 = asuint(in_data_storage_g0_b3_.Load(0+i*16+__dynamic_buffer_offsets0._0)); + out_.Store(4, asuint(_e13)); + uint _e19 = asuint(in_data_storage_g0_b4_.Load(0+i*16+__dynamic_buffer_offsets0._1)); + out_.Store(8, asuint(_e19)); + uint _e25 = asuint(in_data_storage_g1_b0_.Load(0+i*16+__dynamic_buffer_offsets1._0)); + out_.Store(12, asuint(_e25)); + return; +} diff --git a/naga/tests/out/hlsl/bounds-check-dynamic-buffer.ron b/naga/tests/out/hlsl/bounds-check-dynamic-buffer.ron new file mode 100644 index 0000000000..a07b03300b --- /dev/null +++ b/naga/tests/out/hlsl/bounds-check-dynamic-buffer.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_5_1", + ), + ], +) diff --git a/naga/tests/out/hlsl/break-if.hlsl b/naga/tests/out/hlsl/break-if.hlsl index 63a0185583..cb10886543 100644 --- a/naga/tests/out/hlsl/break-if.hlsl +++ b/naga/tests/out/hlsl/break-if.hlsl @@ -1,7 +1,10 @@ void breakIfEmpty() { + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { if (true) { break; @@ -17,8 +20,11 @@ void breakIfEmptyBody(bool a) bool b = (bool)0; bool c = (bool)0; + uint2 loop_bound_1 = uint2(0u, 0u); bool loop_init_1 = true; while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { b = a; bool _e2 = b; @@ -38,8 +44,11 @@ void breakIf(bool a_1) bool d = (bool)0; bool e = (bool)0; + uint2 loop_bound_2 = uint2(0u, 0u); bool loop_init_2 = true; while(true) { + if (all(loop_bound_2 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_2) { bool _e5 = e; if ((a_1 == _e5)) { @@ -58,8 +67,11 @@ void breakIfSeparateVariable() { uint counter = 0u; + uint2 loop_bound_3 = uint2(0u, 0u); bool loop_init_3 = true; while(true) { + if (all(loop_bound_3 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); if (!loop_init_3) { uint _e5 = counter; if ((_e5 == 5u)) { diff --git a/naga/tests/out/hlsl/collatz.hlsl b/naga/tests/out/hlsl/collatz.hlsl index b00586aa4c..3a250a4f25 100644 --- a/naga/tests/out/hlsl/collatz.hlsl +++ b/naga/tests/out/hlsl/collatz.hlsl @@ -6,7 +6,10 @@ uint collatz_iterations(uint n_base) uint i = 0u; n = n_base; + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); uint _e4 = n; if ((_e4 > 1u)) { } else { diff --git a/naga/tests/out/hlsl/const-exprs.hlsl b/naga/tests/out/hlsl/const-exprs.hlsl index 29bec5f17a..aa2ba75ed6 100644 --- a/naga/tests/out/hlsl/const-exprs.hlsl +++ b/naga/tests/out/hlsl/const-exprs.hlsl @@ -17,18 +17,21 @@ void swizzle_of_compose() { int4 out_ = int4(4, 3, 2, 1); + return; } void index_of_compose() { int out_1 = 2; + return; } void compose_three_deep() { int out_2 = 6; + return; } void non_constant_initializers() @@ -55,18 +58,21 @@ void splat_of_constant() { int4 out_4 = int4(-4, -4, -4, -4); + return; } void compose_of_constant() { int4 out_5 = int4(-4, -4, -4, -4); + return; } void compose_of_splat() { float4 x_1 = float4(2.0, 1.0, 1.0, 1.0); + return; } uint map_texture_kind(int texture_kind) diff --git a/naga/tests/out/hlsl/constructors.hlsl b/naga/tests/out/hlsl/constructors.hlsl index 90d8db9a33..c354737114 100644 --- a/naga/tests/out/hlsl/constructors.hlsl +++ b/naga/tests/out/hlsl/constructors.hlsl @@ -88,4 +88,5 @@ void main() int cit2_[4] = Constructarray4_int_(0, 1, 2, 3); uint2 ic4_ = uint2(0u, 0u); float2x3 ic5_ = float2x3(float3(0.0, 0.0, 0.0), float3(0.0, 0.0, 0.0)); + return; } diff --git a/naga/tests/out/hlsl/control-flow.hlsl b/naga/tests/out/hlsl/control-flow.hlsl index 2438858a8a..7a8cb73779 100644 --- a/naga/tests/out/hlsl/control-flow.hlsl +++ b/naga/tests/out/hlsl/control-flow.hlsl @@ -20,7 +20,10 @@ void switch_case_break() void loop_switch_continue(int x) { + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool should_continue = false; switch(x) { case 1: { @@ -40,7 +43,10 @@ void loop_switch_continue(int x) void loop_switch_continue_nesting(int x_1, int y, int z) { + uint2 loop_bound_1 = uint2(0u, 0u); while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); bool should_continue_1 = false; switch(x_1) { case 1: { @@ -54,7 +60,10 @@ void loop_switch_continue_nesting(int x_1, int y, int z) break; } default: { + uint2 loop_bound_2 = uint2(0u, 0u); while(true) { + if (all(loop_bound_2 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); bool should_continue_2 = false; switch(z) { case 1: { @@ -93,7 +102,10 @@ void loop_switch_continue_nesting(int x_1, int y, int z) continue; } } + uint2 loop_bound_3 = uint2(0u, 0u); while(true) { + if (all(loop_bound_3 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); bool should_continue_4 = false; do { do { @@ -115,7 +127,10 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) { int pos_1 = 0; + uint2 loop_bound_4 = uint2(0u, 0u); while(true) { + if (all(loop_bound_4 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_4 += uint2(loop_bound_4.y == 4294967295u, 1u); bool should_continue_5 = false; switch(x_2) { case 1: { @@ -127,7 +142,10 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) } } } + uint2 loop_bound_5 = uint2(0u, 0u); while(true) { + if (all(loop_bound_5 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_5 += uint2(loop_bound_5.y == 4294967295u, 1u); bool should_continue_6 = false; switch(x_2) { case 1: { diff --git a/naga/tests/out/hlsl/cross.hlsl b/naga/tests/out/hlsl/cross.hlsl index 96696c5066..8d78e02e49 100644 --- a/naga/tests/out/hlsl/cross.hlsl +++ b/naga/tests/out/hlsl/cross.hlsl @@ -2,4 +2,5 @@ void main() { float3 a = cross(float3(0.0, 1.0, 2.0), float3(0.0, 1.0, 2.0)); + return; } diff --git a/naga/tests/out/hlsl/do-while.hlsl b/naga/tests/out/hlsl/do-while.hlsl index ca7d42e1e7..659624da79 100644 --- a/naga/tests/out/hlsl/do-while.hlsl +++ b/naga/tests/out/hlsl/do-while.hlsl @@ -1,7 +1,10 @@ void fb1_(inout bool cond) { + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { bool _e1 = cond; if (!(_e1)) { diff --git a/naga/tests/out/hlsl/globals.hlsl b/naga/tests/out/hlsl/globals.hlsl index d6d8eb4107..d320899b89 100644 --- a/naga/tests/out/hlsl/globals.hlsl +++ b/naga/tests/out/hlsl/globals.hlsl @@ -99,6 +99,7 @@ void test_msl_packed_vec3_() float3 mvm1_ = mul(data.v3_, ZeroValuefloat3x3()); float3 svm0_ = (data.v3_ * 2.0); float3 svm1_ = (2.0 * data.v3_); + return; } uint NagaBufferLength(ByteAddressBuffer buffer) diff --git a/naga/tests/out/hlsl/image.hlsl b/naga/tests/out/hlsl/image.hlsl index 84e9f6d706..30bfd86aeb 100644 --- a/naga/tests/out/hlsl/image.hlsl +++ b/naga/tests/out/hlsl/image.hlsl @@ -15,8 +15,11 @@ TextureCube image_cube : register(t5); TextureCubeArray image_cube_array : register(t6); Texture3D image_3d : register(t7); Texture2DMS image_aa : register(t8); -SamplerState sampler_reg : register(s0, space1); -SamplerComparisonState sampler_cmp : register(s1, space1); +SamplerState nagaSamplerHeap[2048]: register(s0, space0); +SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1); +StructuredBuffer nagaGroup1SamplerIndexArray : register(t1, space255); +static const SamplerState sampler_reg = nagaSamplerHeap[nagaGroup1SamplerIndexArray[0]]; +static const SamplerComparisonState sampler_cmp = nagaComparisonSamplerHeap[nagaGroup1SamplerIndexArray[1]]; Texture2D image_2d_depth : register(t2, space1); Texture2DArray image_2d_array_depth : register(t3, space1); TextureCube image_cube_depth : register(t4, space1); diff --git a/naga/tests/out/hlsl/math-functions.hlsl b/naga/tests/out/hlsl/math-functions.hlsl index a94b5062a6..b8ace15b3c 100644 --- a/naga/tests/out/hlsl/math-functions.hlsl +++ b/naga/tests/out/hlsl/math-functions.hlsl @@ -105,4 +105,5 @@ void main() float2 quantizeToF16_b = f16tof32(f32tof16(float2(1.0, 1.0))); float3 quantizeToF16_c = f16tof32(f32tof16(float3(1.0, 1.0, 1.0))); float4 quantizeToF16_d = f16tof32(f32tof16(float4(1.0, 1.0, 1.0, 1.0))); + return; } diff --git a/naga/tests/out/hlsl/operators.hlsl b/naga/tests/out/hlsl/operators.hlsl index 7d9dc8f401..a4a381f7e7 100644 --- a/naga/tests/out/hlsl/operators.hlsl +++ b/naga/tests/out/hlsl/operators.hlsl @@ -53,6 +53,7 @@ void logical() bool3 bitwise_or1_ = ((true).xxx | (false).xxx); bool bitwise_and0_ = (true & false); bool4 bitwise_and1_ = ((true).xxxx & (false).xxxx); + return; } float3x3 ZeroValuefloat3x3() { @@ -141,6 +142,7 @@ void arithmetic() float3 mul_vector0_ = mul((1.0).xxxx, ZeroValuefloat4x3()); float4 mul_vector1_ = mul(ZeroValuefloat4x3(), (2.0).xxx); float3x3 mul_ = mul(ZeroValuefloat3x4(), ZeroValuefloat4x3()); + return; } void bit() @@ -169,6 +171,7 @@ void bit() uint shr1_ = (2u >> 1u); int2 shr2_ = ((2).xx >> (1u).xx); uint3 shr3_ = ((2u).xxx >> (1u).xxx); + return; } void comparison() @@ -209,6 +212,7 @@ void comparison() bool2 gte3_ = ((2).xx >= (1).xx); bool3 gte4_ = ((2u).xxx >= (1u).xxx); bool4 gte5_ = ((2.0).xxxx >= (1.0).xxxx); + return; } int3 ZeroValueint3() { @@ -264,6 +268,7 @@ void negation_avoids_prefix_decrement() int p5_ = -(-(-(-(1)))); int p6_ = -(-(-(-(-(1))))); int p7_ = -(-(-(-(-(1))))); + return; } [numthreads(1, 1, 1)] diff --git a/naga/tests/out/hlsl/phony_assignment.hlsl b/naga/tests/out/hlsl/phony_assignment.hlsl index f448290e0e..dbe587e5c8 100644 --- a/naga/tests/out/hlsl/phony_assignment.hlsl +++ b/naga/tests/out/hlsl/phony_assignment.hlsl @@ -13,4 +13,5 @@ void main(uint3 id : SV_DispatchThreadID) const int _e6 = five(); const int _e7 = five(); float phony_2 = binding; + return; } diff --git a/naga/tests/out/hlsl/quad.hlsl b/naga/tests/out/hlsl/quad.hlsl index 5bd8530c68..580182a7de 100644 --- a/naga/tests/out/hlsl/quad.hlsl +++ b/naga/tests/out/hlsl/quad.hlsl @@ -6,7 +6,10 @@ struct VertexOutput { static const float c_scale = 1.2; Texture2D u_texture : register(t0); -SamplerState u_sampler : register(s1); +SamplerState nagaSamplerHeap[2048]: register(s0, space0); +SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1); +StructuredBuffer nagaGroup0SamplerIndexArray : register(t0, space255); +static const SamplerState u_sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[1]]; struct VertexOutput_vert_main { float2 uv_2 : LOC0; diff --git a/naga/tests/out/hlsl/ray-query.hlsl b/naga/tests/out/hlsl/ray-query.hlsl index 9a0a2da1ce..69c616ef87 100644 --- a/naga/tests/out/hlsl/ray-query.hlsl +++ b/naga/tests/out/hlsl/ray-query.hlsl @@ -1,8 +1,8 @@ struct RayIntersection { uint kind; float t; - uint instance_custom_index; - uint instance_id; + uint instance_custom_data; + uint instance_index; uint sbt_record_offset; uint geometry_index; uint primitive_index; @@ -64,8 +64,8 @@ RayIntersection GetCommittedIntersection(RayQuery rq) { ret.kind = rq.CommittedStatus(); if( rq.CommittedStatus() == COMMITTED_NOTHING) {} else { ret.t = rq.CommittedRayT(); - ret.instance_custom_index = rq.CommittedInstanceID(); - ret.instance_id = rq.CommittedInstanceIndex(); + ret.instance_custom_data = rq.CommittedInstanceID(); + ret.instance_index = rq.CommittedInstanceIndex(); ret.sbt_record_offset = rq.CommittedInstanceContributionToHitGroupIndex(); ret.geometry_index = rq.CommittedGeometryIndex(); ret.primitive_index = rq.CommittedPrimitiveIndex(); @@ -84,7 +84,10 @@ RayIntersection query_loop(float3 pos, float3 dir, RaytracingAccelerationStructu RayQuery rq_1; rq_1.TraceRayInline(acs, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).flags, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).cull_mask, RayDescFromRayDesc_(ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir))); + uint2 loop_bound = uint2(0u, 0u); while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); const bool _e9 = rq_1.Proceed(); if (_e9) { } else { @@ -128,8 +131,8 @@ RayIntersection GetCandidateIntersection(RayQuery rq) { } else { ret.kind = 3; } - ret.instance_custom_index = rq.CandidateInstanceID(); - ret.instance_id = rq.CandidateInstanceIndex(); + ret.instance_custom_data = rq.CandidateInstanceID(); + ret.instance_index = rq.CandidateInstanceIndex(); ret.sbt_record_offset = rq.CandidateInstanceContributionToHitGroupIndex(); ret.geometry_index = rq.CandidateGeometryIndex(); ret.primitive_index = rq.CandidatePrimitiveIndex(); diff --git a/naga/tests/out/hlsl/shadow.hlsl b/naga/tests/out/hlsl/shadow.hlsl index c0431bfef9..2508a701fa 100644 --- a/naga/tests/out/hlsl/shadow.hlsl +++ b/naga/tests/out/hlsl/shadow.hlsl @@ -28,7 +28,10 @@ cbuffer u_entity : register(b0, space1) { Entity u_entity; } ByteAddressBuffer s_lights : register(t1); cbuffer u_lights : register(b1) { Light u_lights[10]; } Texture2DArray t_shadow : register(t2); -SamplerComparisonState sampler_shadow : register(s3); +SamplerState nagaSamplerHeap[2048]: register(s0, space0); +SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1); +StructuredBuffer nagaGroup0SamplerIndexArray : register(t0, space255); +static const SamplerComparisonState sampler_shadow = nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[3]]; struct VertexOutput_vs_main { float3 world_normal : LOC0; @@ -92,8 +95,11 @@ float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 uint i = 0u; float3 normal_1 = normalize(in_.world_normal); + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e40 = i; i = (_e40 + 1u); @@ -128,8 +134,11 @@ float4 fs_main_without_storage(FragmentInput_fs_main_without_storage fragmentinp uint i_1 = 0u; float3 normal_2 = normalize(in_1.world_normal); + uint2 loop_bound_1 = uint2(0u, 0u); bool loop_init_1 = true; while(true) { + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { uint _e40 = i_1; i_1 = (_e40 + 1u); diff --git a/naga/tests/out/hlsl/skybox.hlsl b/naga/tests/out/hlsl/skybox.hlsl index f33cc461a2..f1d77c3c50 100644 --- a/naga/tests/out/hlsl/skybox.hlsl +++ b/naga/tests/out/hlsl/skybox.hlsl @@ -17,7 +17,10 @@ struct Data { cbuffer r_data : register(b0) { Data r_data; } TextureCube r_texture : register(t0); -SamplerState r_sampler : register(s0, space1); +SamplerState nagaSamplerHeap[2048]: register(s0, space0); +SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1); +StructuredBuffer nagaGroup0SamplerIndexArray : register(t0, space2); +static const SamplerState r_sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[0]]; struct VertexOutput_vs_main { float3 uv : LOC0; diff --git a/naga/tests/out/hlsl/storage-textures.hlsl b/naga/tests/out/hlsl/storage-textures.hlsl new file mode 100644 index 0000000000..b5aa2f47f2 --- /dev/null +++ b/naga/tests/out/hlsl/storage-textures.hlsl @@ -0,0 +1,24 @@ +RWTexture2D s_r_r : register(u0); +RWTexture2D s_rg_r : register(u1); +RWTexture2D s_rgba_r : register(u2); +RWTexture2D s_r_w : register(u0, space1); +RWTexture2D s_rg_w : register(u1, space1); +RWTexture2D s_rgba_w : register(u2, space1); + +[numthreads(1, 1, 1)] +void csLoad() +{ + float4 phony = s_r_r.Load((0u).xx); + float4 phony_1 = s_rg_r.Load((0u).xx); + float4 phony_2 = s_rgba_r.Load((0u).xx); + return; +} + +[numthreads(1, 1, 1)] +void csStore() +{ + s_r_w[(0u).xx] = (0.0).xxxx; + s_rg_w[(0u).xx] = (0.0).xxxx; + s_rgba_w[(0u).xx] = (0.0).xxxx; + return; +} diff --git a/naga/tests/out/hlsl/storage-textures.ron b/naga/tests/out/hlsl/storage-textures.ron new file mode 100644 index 0000000000..b07b4fb7aa --- /dev/null +++ b/naga/tests/out/hlsl/storage-textures.ron @@ -0,0 +1,16 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"csLoad", + target_profile:"cs_5_1", + ), + ( + entry_point:"csStore", + target_profile:"cs_5_1", + ), + ], +) diff --git a/naga/tests/out/hlsl/texture-arg.hlsl b/naga/tests/out/hlsl/texture-arg.hlsl index 14971d6b3f..a2f24e626f 100644 --- a/naga/tests/out/hlsl/texture-arg.hlsl +++ b/naga/tests/out/hlsl/texture-arg.hlsl @@ -1,5 +1,8 @@ Texture2D Texture : register(t0); -SamplerState Sampler : register(s1); +SamplerState nagaSamplerHeap[2048]: register(s0, space0); +SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1); +StructuredBuffer nagaGroup0SamplerIndexArray : register(t0, space255); +static const SamplerState Sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[1]]; float4 test(Texture2D Passed_Texture, SamplerState Passed_Sampler) { diff --git a/naga/tests/out/ir/access.compact.ron b/naga/tests/out/ir/access.compact.ron index db21819dd9..3963766551 100644 --- a/naga/tests/out/ir/access.compact.ron +++ b/naga/tests/out/ir/access.compact.ron @@ -353,6 +353,69 @@ space: Function, ), ), + ( + name: None, + inner: Scalar(( + kind: Bool, + width: 1, + )), + ), + ( + name: None, + inner: Array( + base: 33, + size: Constant(1), + stride: 1, + ), + ), + ( + name: Some("S"), + inner: Struct( + members: [ + ( + name: Some("m"), + ty: 2, + binding: None, + offset: 0, + ), + ], + span: 4, + ), + ), + ( + name: Some("Inner"), + inner: Struct( + members: [ + ( + name: Some("delicious"), + ty: 2, + binding: None, + offset: 0, + ), + ], + span: 4, + ), + ), + ( + name: Some("Outer"), + inner: Struct( + members: [ + ( + name: Some("om_nom_nom"), + ty: 36, + binding: None, + offset: 0, + ), + ( + name: Some("thing"), + ty: 0, + binding: None, + offset: 4, + ), + ], + span: 8, + ), + ), ], special_types: ( ray_desc: None, @@ -1848,6 +1911,301 @@ ], diagnostic_filter_leaf: None, ), + ( + name: Some("index_ptr"), + arguments: [ + ( + name: Some("value"), + ty: 33, + binding: None, + ), + ], + result: Some(( + ty: 33, + binding: None, + )), + local_variables: [ + ( + name: Some("a"), + ty: 34, + init: None, + ), + ], + expressions: [ + FunctionArgument(0), + Compose( + ty: 34, + components: [ + 0, + ], + ), + LocalVariable(0), + AccessIndex( + base: 2, + index: 0, + ), + Load( + pointer: 3, + ), + ], + named_expressions: { + 0: "value", + 2: "p", + }, + body: [ + Emit(( + start: 1, + end: 2, + )), + Store( + pointer: 2, + value: 1, + ), + Emit(( + start: 3, + end: 5, + )), + Return( + value: Some(4), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("member_ptr"), + arguments: [], + result: Some(( + ty: 2, + binding: None, + )), + local_variables: [ + ( + name: Some("s"), + ty: 35, + init: Some(1), + ), + ], + expressions: [ + Literal(I32(42)), + Compose( + ty: 35, + components: [ + 0, + ], + ), + LocalVariable(0), + AccessIndex( + base: 2, + index: 0, + ), + Load( + pointer: 3, + ), + ], + named_expressions: { + 2: "p", + }, + body: [ + Emit(( + start: 1, + end: 2, + )), + Emit(( + start: 3, + end: 5, + )), + Return( + value: Some(4), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("let_members_of_members"), + arguments: [], + result: Some(( + ty: 2, + binding: None, + )), + local_variables: [], + expressions: [ + ZeroValue(37), + AccessIndex( + base: 0, + index: 0, + ), + AccessIndex( + base: 1, + index: 0, + ), + AccessIndex( + base: 0, + index: 1, + ), + As( + expr: 2, + kind: Uint, + convert: Some(4), + ), + Binary( + op: NotEqual, + left: 3, + right: 4, + ), + AccessIndex( + base: 0, + index: 0, + ), + AccessIndex( + base: 6, + index: 0, + ), + ], + named_expressions: { + 0: "thing", + 1: "inner", + 2: "delishus", + }, + body: [ + Emit(( + start: 1, + end: 2, + )), + Emit(( + start: 2, + end: 3, + )), + Emit(( + start: 3, + end: 6, + )), + If( + condition: 5, + accept: [], + reject: [], + ), + Emit(( + start: 6, + end: 8, + )), + Return( + value: Some(7), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("var_members_of_members"), + arguments: [], + result: Some(( + ty: 2, + binding: None, + )), + local_variables: [ + ( + name: Some("thing"), + ty: 37, + init: Some(0), + ), + ( + name: Some("inner"), + ty: 36, + init: None, + ), + ( + name: Some("delishus"), + ty: 2, + init: None, + ), + ], + expressions: [ + ZeroValue(37), + LocalVariable(0), + AccessIndex( + base: 1, + index: 0, + ), + Load( + pointer: 2, + ), + LocalVariable(1), + AccessIndex( + base: 4, + index: 0, + ), + Load( + pointer: 5, + ), + LocalVariable(2), + AccessIndex( + base: 1, + index: 1, + ), + Load( + pointer: 8, + ), + Load( + pointer: 7, + ), + As( + expr: 10, + kind: Uint, + convert: Some(4), + ), + Binary( + op: NotEqual, + left: 9, + right: 11, + ), + AccessIndex( + base: 1, + index: 0, + ), + AccessIndex( + base: 13, + index: 0, + ), + Load( + pointer: 14, + ), + ], + named_expressions: {}, + body: [ + Emit(( + start: 2, + end: 4, + )), + Store( + pointer: 4, + value: 3, + ), + Emit(( + start: 5, + end: 7, + )), + Store( + pointer: 7, + value: 6, + ), + Emit(( + start: 8, + end: 13, + )), + If( + condition: 12, + accept: [], + reject: [], + ), + Emit(( + start: 13, + end: 16, + )), + Return( + value: Some(15), + ), + ], + diagnostic_filter_leaf: None, + ), ], entry_points: [ ( diff --git a/naga/tests/out/ir/access.ron b/naga/tests/out/ir/access.ron index db21819dd9..3963766551 100644 --- a/naga/tests/out/ir/access.ron +++ b/naga/tests/out/ir/access.ron @@ -353,6 +353,69 @@ space: Function, ), ), + ( + name: None, + inner: Scalar(( + kind: Bool, + width: 1, + )), + ), + ( + name: None, + inner: Array( + base: 33, + size: Constant(1), + stride: 1, + ), + ), + ( + name: Some("S"), + inner: Struct( + members: [ + ( + name: Some("m"), + ty: 2, + binding: None, + offset: 0, + ), + ], + span: 4, + ), + ), + ( + name: Some("Inner"), + inner: Struct( + members: [ + ( + name: Some("delicious"), + ty: 2, + binding: None, + offset: 0, + ), + ], + span: 4, + ), + ), + ( + name: Some("Outer"), + inner: Struct( + members: [ + ( + name: Some("om_nom_nom"), + ty: 36, + binding: None, + offset: 0, + ), + ( + name: Some("thing"), + ty: 0, + binding: None, + offset: 4, + ), + ], + span: 8, + ), + ), ], special_types: ( ray_desc: None, @@ -1848,6 +1911,301 @@ ], diagnostic_filter_leaf: None, ), + ( + name: Some("index_ptr"), + arguments: [ + ( + name: Some("value"), + ty: 33, + binding: None, + ), + ], + result: Some(( + ty: 33, + binding: None, + )), + local_variables: [ + ( + name: Some("a"), + ty: 34, + init: None, + ), + ], + expressions: [ + FunctionArgument(0), + Compose( + ty: 34, + components: [ + 0, + ], + ), + LocalVariable(0), + AccessIndex( + base: 2, + index: 0, + ), + Load( + pointer: 3, + ), + ], + named_expressions: { + 0: "value", + 2: "p", + }, + body: [ + Emit(( + start: 1, + end: 2, + )), + Store( + pointer: 2, + value: 1, + ), + Emit(( + start: 3, + end: 5, + )), + Return( + value: Some(4), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("member_ptr"), + arguments: [], + result: Some(( + ty: 2, + binding: None, + )), + local_variables: [ + ( + name: Some("s"), + ty: 35, + init: Some(1), + ), + ], + expressions: [ + Literal(I32(42)), + Compose( + ty: 35, + components: [ + 0, + ], + ), + LocalVariable(0), + AccessIndex( + base: 2, + index: 0, + ), + Load( + pointer: 3, + ), + ], + named_expressions: { + 2: "p", + }, + body: [ + Emit(( + start: 1, + end: 2, + )), + Emit(( + start: 3, + end: 5, + )), + Return( + value: Some(4), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("let_members_of_members"), + arguments: [], + result: Some(( + ty: 2, + binding: None, + )), + local_variables: [], + expressions: [ + ZeroValue(37), + AccessIndex( + base: 0, + index: 0, + ), + AccessIndex( + base: 1, + index: 0, + ), + AccessIndex( + base: 0, + index: 1, + ), + As( + expr: 2, + kind: Uint, + convert: Some(4), + ), + Binary( + op: NotEqual, + left: 3, + right: 4, + ), + AccessIndex( + base: 0, + index: 0, + ), + AccessIndex( + base: 6, + index: 0, + ), + ], + named_expressions: { + 0: "thing", + 1: "inner", + 2: "delishus", + }, + body: [ + Emit(( + start: 1, + end: 2, + )), + Emit(( + start: 2, + end: 3, + )), + Emit(( + start: 3, + end: 6, + )), + If( + condition: 5, + accept: [], + reject: [], + ), + Emit(( + start: 6, + end: 8, + )), + Return( + value: Some(7), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("var_members_of_members"), + arguments: [], + result: Some(( + ty: 2, + binding: None, + )), + local_variables: [ + ( + name: Some("thing"), + ty: 37, + init: Some(0), + ), + ( + name: Some("inner"), + ty: 36, + init: None, + ), + ( + name: Some("delishus"), + ty: 2, + init: None, + ), + ], + expressions: [ + ZeroValue(37), + LocalVariable(0), + AccessIndex( + base: 1, + index: 0, + ), + Load( + pointer: 2, + ), + LocalVariable(1), + AccessIndex( + base: 4, + index: 0, + ), + Load( + pointer: 5, + ), + LocalVariable(2), + AccessIndex( + base: 1, + index: 1, + ), + Load( + pointer: 8, + ), + Load( + pointer: 7, + ), + As( + expr: 10, + kind: Uint, + convert: Some(4), + ), + Binary( + op: NotEqual, + left: 9, + right: 11, + ), + AccessIndex( + base: 1, + index: 0, + ), + AccessIndex( + base: 13, + index: 0, + ), + Load( + pointer: 14, + ), + ], + named_expressions: {}, + body: [ + Emit(( + start: 2, + end: 4, + )), + Store( + pointer: 4, + value: 3, + ), + Emit(( + start: 5, + end: 7, + )), + Store( + pointer: 7, + value: 6, + ), + Emit(( + start: 8, + end: 13, + )), + If( + condition: 12, + accept: [], + reject: [], + ), + Emit(( + start: 13, + end: 16, + )), + Return( + value: Some(15), + ), + ], + diagnostic_filter_leaf: None, + ), ], entry_points: [ ( diff --git a/naga/tests/out/ir/local-const.compact.ron b/naga/tests/out/ir/local-const.compact.ron index 154ce10dca..dabe3d3dec 100644 --- a/naga/tests/out/ir/local-const.compact.ron +++ b/naga/tests/out/ir/local-const.compact.ron @@ -133,6 +133,9 @@ start: 4, end: 5, )), + Return( + value: None, + ), ], diagnostic_filter_leaf: None, ), diff --git a/naga/tests/out/ir/local-const.ron b/naga/tests/out/ir/local-const.ron index 154ce10dca..dabe3d3dec 100644 --- a/naga/tests/out/ir/local-const.ron +++ b/naga/tests/out/ir/local-const.ron @@ -133,6 +133,9 @@ start: 4, end: 5, )), + Return( + value: None, + ), ], diagnostic_filter_leaf: None, ), diff --git a/naga/tests/out/ir/must-use.compact.ron b/naga/tests/out/ir/must-use.compact.ron new file mode 100644 index 0000000000..3d51cb0c95 --- /dev/null +++ b/naga/tests/out/ir/must-use.compact.ron @@ -0,0 +1,181 @@ +( + types: [ + ( + name: None, + inner: Scalar(( + kind: Sint, + width: 4, + )), + ), + ], + special_types: ( + ray_desc: None, + ray_intersection: None, + predeclared_types: {}, + ), + constants: [], + overrides: [], + global_variables: [], + global_expressions: [], + functions: [ + ( + name: Some("use_me"), + arguments: [], + result: Some(( + ty: 0, + binding: None, + )), + local_variables: [], + expressions: [ + Literal(I32(10)), + ], + named_expressions: {}, + body: [ + Return( + value: Some(0), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("use_return"), + arguments: [], + result: Some(( + ty: 0, + binding: None, + )), + local_variables: [], + expressions: [ + CallResult(0), + ], + named_expressions: {}, + body: [ + Call( + function: 0, + arguments: [], + result: Some(0), + ), + Return( + value: Some(0), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("use_assign_var"), + arguments: [], + result: Some(( + ty: 0, + binding: None, + )), + local_variables: [ + ( + name: Some("q"), + ty: 0, + init: None, + ), + ], + expressions: [ + CallResult(0), + LocalVariable(0), + Load( + pointer: 1, + ), + ], + named_expressions: {}, + body: [ + Call( + function: 0, + arguments: [], + result: Some(0), + ), + Store( + pointer: 1, + value: 0, + ), + Emit(( + start: 2, + end: 3, + )), + Return( + value: Some(2), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("use_assign_let"), + arguments: [], + result: Some(( + ty: 0, + binding: None, + )), + local_variables: [], + expressions: [ + CallResult(0), + ], + named_expressions: { + 0: "q", + }, + body: [ + Call( + function: 0, + arguments: [], + result: Some(0), + ), + Return( + value: Some(0), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("use_phony_assign"), + arguments: [], + result: None, + local_variables: [], + expressions: [ + CallResult(0), + ], + named_expressions: { + 0: "phony", + }, + body: [ + Call( + function: 0, + arguments: [], + result: Some(0), + ), + Return( + value: None, + ), + ], + diagnostic_filter_leaf: None, + ), + ], + entry_points: [ + ( + name: "main", + stage: Compute, + early_depth_test: None, + workgroup_size: (1, 1, 1), + workgroup_size_overrides: None, + function: ( + name: Some("main"), + arguments: [], + result: None, + local_variables: [], + expressions: [], + named_expressions: {}, + body: [ + Return( + value: None, + ), + ], + diagnostic_filter_leaf: None, + ), + ), + ], + diagnostic_filters: [], + diagnostic_filter_leaf: None, +) \ No newline at end of file diff --git a/naga/tests/out/ir/must-use.ron b/naga/tests/out/ir/must-use.ron new file mode 100644 index 0000000000..3d51cb0c95 --- /dev/null +++ b/naga/tests/out/ir/must-use.ron @@ -0,0 +1,181 @@ +( + types: [ + ( + name: None, + inner: Scalar(( + kind: Sint, + width: 4, + )), + ), + ], + special_types: ( + ray_desc: None, + ray_intersection: None, + predeclared_types: {}, + ), + constants: [], + overrides: [], + global_variables: [], + global_expressions: [], + functions: [ + ( + name: Some("use_me"), + arguments: [], + result: Some(( + ty: 0, + binding: None, + )), + local_variables: [], + expressions: [ + Literal(I32(10)), + ], + named_expressions: {}, + body: [ + Return( + value: Some(0), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("use_return"), + arguments: [], + result: Some(( + ty: 0, + binding: None, + )), + local_variables: [], + expressions: [ + CallResult(0), + ], + named_expressions: {}, + body: [ + Call( + function: 0, + arguments: [], + result: Some(0), + ), + Return( + value: Some(0), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("use_assign_var"), + arguments: [], + result: Some(( + ty: 0, + binding: None, + )), + local_variables: [ + ( + name: Some("q"), + ty: 0, + init: None, + ), + ], + expressions: [ + CallResult(0), + LocalVariable(0), + Load( + pointer: 1, + ), + ], + named_expressions: {}, + body: [ + Call( + function: 0, + arguments: [], + result: Some(0), + ), + Store( + pointer: 1, + value: 0, + ), + Emit(( + start: 2, + end: 3, + )), + Return( + value: Some(2), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("use_assign_let"), + arguments: [], + result: Some(( + ty: 0, + binding: None, + )), + local_variables: [], + expressions: [ + CallResult(0), + ], + named_expressions: { + 0: "q", + }, + body: [ + Call( + function: 0, + arguments: [], + result: Some(0), + ), + Return( + value: Some(0), + ), + ], + diagnostic_filter_leaf: None, + ), + ( + name: Some("use_phony_assign"), + arguments: [], + result: None, + local_variables: [], + expressions: [ + CallResult(0), + ], + named_expressions: { + 0: "phony", + }, + body: [ + Call( + function: 0, + arguments: [], + result: Some(0), + ), + Return( + value: None, + ), + ], + diagnostic_filter_leaf: None, + ), + ], + entry_points: [ + ( + name: "main", + stage: Compute, + early_depth_test: None, + workgroup_size: (1, 1, 1), + workgroup_size_overrides: None, + function: ( + name: Some("main"), + arguments: [], + result: None, + local_variables: [], + expressions: [], + named_expressions: {}, + body: [ + Return( + value: None, + ), + ], + diagnostic_filter_leaf: None, + ), + ), + ], + diagnostic_filters: [], + diagnostic_filter_leaf: None, +) \ No newline at end of file diff --git a/naga/tests/out/ir/storage-textures.compact.ron b/naga/tests/out/ir/storage-textures.compact.ron new file mode 100644 index 0000000000..3f2f06439c --- /dev/null +++ b/naga/tests/out/ir/storage-textures.compact.ron @@ -0,0 +1,319 @@ +( + types: [ + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: R32Float, + access: ("LOAD"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: Rg32Float, + access: ("LOAD"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: Rgba32Float, + access: ("LOAD"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: R32Float, + access: ("STORE"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: Rg32Float, + access: ("STORE"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: Rgba32Float, + access: ("STORE"), + ), + ), + ), + ], + special_types: ( + ray_desc: None, + ray_intersection: None, + predeclared_types: {}, + ), + constants: [], + overrides: [], + global_variables: [ + ( + name: Some("s_r_r"), + space: Handle, + binding: Some(( + group: 0, + binding: 0, + )), + ty: 0, + init: None, + ), + ( + name: Some("s_rg_r"), + space: Handle, + binding: Some(( + group: 0, + binding: 1, + )), + ty: 1, + init: None, + ), + ( + name: Some("s_rgba_r"), + space: Handle, + binding: Some(( + group: 0, + binding: 2, + )), + ty: 2, + init: None, + ), + ( + name: Some("s_r_w"), + space: Handle, + binding: Some(( + group: 1, + binding: 0, + )), + ty: 3, + init: None, + ), + ( + name: Some("s_rg_w"), + space: Handle, + binding: Some(( + group: 1, + binding: 1, + )), + ty: 4, + init: None, + ), + ( + name: Some("s_rgba_w"), + space: Handle, + binding: Some(( + group: 1, + binding: 2, + )), + ty: 5, + init: None, + ), + ], + global_expressions: [], + functions: [], + entry_points: [ + ( + name: "csLoad", + stage: Compute, + early_depth_test: None, + workgroup_size: (1, 1, 1), + workgroup_size_overrides: None, + function: ( + name: Some("csLoad"), + arguments: [], + result: None, + local_variables: [], + expressions: [ + GlobalVariable(0), + Literal(U32(0)), + Splat( + size: Bi, + value: 1, + ), + ImageLoad( + image: 0, + coordinate: 2, + array_index: None, + sample: None, + level: None, + ), + GlobalVariable(1), + Literal(U32(0)), + Splat( + size: Bi, + value: 5, + ), + ImageLoad( + image: 4, + coordinate: 6, + array_index: None, + sample: None, + level: None, + ), + GlobalVariable(2), + Literal(U32(0)), + Splat( + size: Bi, + value: 9, + ), + ImageLoad( + image: 8, + coordinate: 10, + array_index: None, + sample: None, + level: None, + ), + ], + named_expressions: { + 3: "phony", + 7: "phony", + 11: "phony", + }, + body: [ + Emit(( + start: 2, + end: 4, + )), + Emit(( + start: 6, + end: 8, + )), + Emit(( + start: 10, + end: 12, + )), + Return( + value: None, + ), + ], + diagnostic_filter_leaf: None, + ), + ), + ( + name: "csStore", + stage: Compute, + early_depth_test: None, + workgroup_size: (1, 1, 1), + workgroup_size_overrides: None, + function: ( + name: Some("csStore"), + arguments: [], + result: None, + local_variables: [], + expressions: [ + GlobalVariable(3), + Literal(U32(0)), + Splat( + size: Bi, + value: 1, + ), + Literal(F32(0.0)), + Splat( + size: Quad, + value: 3, + ), + GlobalVariable(4), + Literal(U32(0)), + Splat( + size: Bi, + value: 6, + ), + Literal(F32(0.0)), + Splat( + size: Quad, + value: 8, + ), + GlobalVariable(5), + Literal(U32(0)), + Splat( + size: Bi, + value: 11, + ), + Literal(F32(0.0)), + Splat( + size: Quad, + value: 13, + ), + ], + named_expressions: {}, + body: [ + Emit(( + start: 2, + end: 3, + )), + Emit(( + start: 4, + end: 5, + )), + ImageStore( + image: 0, + coordinate: 2, + array_index: None, + value: 4, + ), + Emit(( + start: 7, + end: 8, + )), + Emit(( + start: 9, + end: 10, + )), + ImageStore( + image: 5, + coordinate: 7, + array_index: None, + value: 9, + ), + Emit(( + start: 12, + end: 13, + )), + Emit(( + start: 14, + end: 15, + )), + ImageStore( + image: 10, + coordinate: 12, + array_index: None, + value: 14, + ), + Return( + value: None, + ), + ], + diagnostic_filter_leaf: None, + ), + ), + ], + diagnostic_filters: [], + diagnostic_filter_leaf: None, +) \ No newline at end of file diff --git a/naga/tests/out/ir/storage-textures.ron b/naga/tests/out/ir/storage-textures.ron new file mode 100644 index 0000000000..3f2f06439c --- /dev/null +++ b/naga/tests/out/ir/storage-textures.ron @@ -0,0 +1,319 @@ +( + types: [ + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: R32Float, + access: ("LOAD"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: Rg32Float, + access: ("LOAD"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: Rgba32Float, + access: ("LOAD"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: R32Float, + access: ("STORE"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: Rg32Float, + access: ("STORE"), + ), + ), + ), + ( + name: None, + inner: Image( + dim: D2, + arrayed: false, + class: Storage( + format: Rgba32Float, + access: ("STORE"), + ), + ), + ), + ], + special_types: ( + ray_desc: None, + ray_intersection: None, + predeclared_types: {}, + ), + constants: [], + overrides: [], + global_variables: [ + ( + name: Some("s_r_r"), + space: Handle, + binding: Some(( + group: 0, + binding: 0, + )), + ty: 0, + init: None, + ), + ( + name: Some("s_rg_r"), + space: Handle, + binding: Some(( + group: 0, + binding: 1, + )), + ty: 1, + init: None, + ), + ( + name: Some("s_rgba_r"), + space: Handle, + binding: Some(( + group: 0, + binding: 2, + )), + ty: 2, + init: None, + ), + ( + name: Some("s_r_w"), + space: Handle, + binding: Some(( + group: 1, + binding: 0, + )), + ty: 3, + init: None, + ), + ( + name: Some("s_rg_w"), + space: Handle, + binding: Some(( + group: 1, + binding: 1, + )), + ty: 4, + init: None, + ), + ( + name: Some("s_rgba_w"), + space: Handle, + binding: Some(( + group: 1, + binding: 2, + )), + ty: 5, + init: None, + ), + ], + global_expressions: [], + functions: [], + entry_points: [ + ( + name: "csLoad", + stage: Compute, + early_depth_test: None, + workgroup_size: (1, 1, 1), + workgroup_size_overrides: None, + function: ( + name: Some("csLoad"), + arguments: [], + result: None, + local_variables: [], + expressions: [ + GlobalVariable(0), + Literal(U32(0)), + Splat( + size: Bi, + value: 1, + ), + ImageLoad( + image: 0, + coordinate: 2, + array_index: None, + sample: None, + level: None, + ), + GlobalVariable(1), + Literal(U32(0)), + Splat( + size: Bi, + value: 5, + ), + ImageLoad( + image: 4, + coordinate: 6, + array_index: None, + sample: None, + level: None, + ), + GlobalVariable(2), + Literal(U32(0)), + Splat( + size: Bi, + value: 9, + ), + ImageLoad( + image: 8, + coordinate: 10, + array_index: None, + sample: None, + level: None, + ), + ], + named_expressions: { + 3: "phony", + 7: "phony", + 11: "phony", + }, + body: [ + Emit(( + start: 2, + end: 4, + )), + Emit(( + start: 6, + end: 8, + )), + Emit(( + start: 10, + end: 12, + )), + Return( + value: None, + ), + ], + diagnostic_filter_leaf: None, + ), + ), + ( + name: "csStore", + stage: Compute, + early_depth_test: None, + workgroup_size: (1, 1, 1), + workgroup_size_overrides: None, + function: ( + name: Some("csStore"), + arguments: [], + result: None, + local_variables: [], + expressions: [ + GlobalVariable(3), + Literal(U32(0)), + Splat( + size: Bi, + value: 1, + ), + Literal(F32(0.0)), + Splat( + size: Quad, + value: 3, + ), + GlobalVariable(4), + Literal(U32(0)), + Splat( + size: Bi, + value: 6, + ), + Literal(F32(0.0)), + Splat( + size: Quad, + value: 8, + ), + GlobalVariable(5), + Literal(U32(0)), + Splat( + size: Bi, + value: 11, + ), + Literal(F32(0.0)), + Splat( + size: Quad, + value: 13, + ), + ], + named_expressions: {}, + body: [ + Emit(( + start: 2, + end: 3, + )), + Emit(( + start: 4, + end: 5, + )), + ImageStore( + image: 0, + coordinate: 2, + array_index: None, + value: 4, + ), + Emit(( + start: 7, + end: 8, + )), + Emit(( + start: 9, + end: 10, + )), + ImageStore( + image: 5, + coordinate: 7, + array_index: None, + value: 9, + ), + Emit(( + start: 12, + end: 13, + )), + Emit(( + start: 14, + end: 15, + )), + ImageStore( + image: 10, + coordinate: 12, + array_index: None, + value: 14, + ), + Return( + value: None, + ), + ], + diagnostic_filter_leaf: None, + ), + ), + ], + diagnostic_filters: [], + diagnostic_filter_leaf: None, +) \ No newline at end of file diff --git a/naga/tests/out/msl/6772-unpack-expr-accesses.msl b/naga/tests/out/msl/6772-unpack-expr-accesses.msl index e00a1b4c54..0097ef6145 100644 --- a/naga/tests/out/msl/6772-unpack-expr-accesses.msl +++ b/naga/tests/out/msl/6772-unpack-expr-accesses.msl @@ -9,4 +9,5 @@ kernel void main_( ) { int phony = (int4(12u, 12u >> 8, 12u >> 16, 12u >> 24) << 24 >> 24)[2]; uint phony_1 = (uint4(12u, 12u >> 8, 12u >> 16, 12u >> 24) << 24 >> 24).y; + return; } diff --git a/naga/tests/out/msl/abstract-types-operators.msl b/naga/tests/out/msl/abstract-types-operators.msl index cec86a53dc..6bb3ba4380 100644 --- a/naga/tests/out/msl/abstract-types-operators.msl +++ b/naga/tests/out/msl/abstract-types-operators.msl @@ -73,12 +73,12 @@ void runtime_values( float _e27 = f; plus_f_f_f = _e26 + _e27; int _e31 = i; - plus_iai_i = 1 + _e31; + plus_iai_i = as_type(as_type(1) + as_type(_e31)); int _e35 = i; - plus_i_iai = _e35 + 2; + plus_i_iai = as_type(as_type(_e35) + as_type(2)); int _e39 = i; int _e40 = i; - plus_i_i_i = _e39 + _e40; + plus_i_i_i = as_type(as_type(_e39) + as_type(_e40)); uint _e44 = u; plus_uai_u = 1u + _e44; uint _e48 = u; @@ -97,5 +97,6 @@ void wgpu_4445_( void wgpu_4435_( threadgroup type_3& a ) { - uint y = a.inner[1 - 1]; + uint y = a.inner[as_type(as_type(1) - as_type(1))]; + return; } diff --git a/naga/tests/out/msl/abstract-types-return.msl b/naga/tests/out/msl/abstract-types-return.msl new file mode 100644 index 0000000000..99072e7473 --- /dev/null +++ b/naga/tests/out/msl/abstract-types-return.msl @@ -0,0 +1,44 @@ +// language: metal1.0 +#include +#include + +using metal::uint; + +struct type_4 { + float inner[4]; +}; + +int return_i32_ai( +) { + return 1; +} + +uint return_u32_ai( +) { + return 1u; +} + +float return_f32_ai( +) { + return 1.0; +} + +float return_f32_af( +) { + return 1.0; +} + +metal::float2 return_vec2f32_ai( +) { + return metal::float2(1.0); +} + +type_4 return_arrf32_ai( +) { + return type_4 {1.0, 1.0, 1.0, 1.0}; +} + +kernel void main_( +) { + return; +} diff --git a/naga/tests/out/msl/access.msl b/naga/tests/out/msl/access.msl index 2103a8d099..eb0808f80e 100644 --- a/naga/tests/out/msl/access.msl +++ b/naga/tests/out/msl/access.msl @@ -63,6 +63,19 @@ struct AssignToMember { struct type_25 { uint inner[4]; }; +struct type_28 { + bool inner[1]; +}; +struct S { + int m; +}; +struct Inner { + int delicious; +}; +struct Outer { + Inner om_nom_nom; + uint thing; +}; void test_matrix_within_struct_accesses( constant Baz& baz @@ -70,7 +83,7 @@ void test_matrix_within_struct_accesses( int idx = 1; Baz t = Baz {metal::float3x2(metal::float2(1.0), metal::float2(2.0), metal::float2(3.0))}; int _e3 = idx; - idx = _e3 - 1; + idx = as_type(as_type(_e3) - as_type(1)); metal::float3x2 l0_ = baz.m; metal::float2 l1_ = baz.m[0]; int _e14 = idx; @@ -84,7 +97,7 @@ void test_matrix_within_struct_accesses( int _e38 = idx; float l6_ = baz.m[_e36][_e38]; int _e51 = idx; - idx = _e51 + 1; + idx = as_type(as_type(_e51) + as_type(1)); t.m = metal::float3x2(metal::float2(6.0), metal::float2(5.0), metal::float2(4.0)); t.m[0] = metal::float2(9.0); int _e66 = idx; @@ -106,7 +119,7 @@ void test_matrix_within_array_within_struct_accesses( int idx_1 = 1; MatCx2InArray t_1 = MatCx2InArray {type_15 {}}; int _e3 = idx_1; - idx_1 = _e3 - 1; + idx_1 = as_type(as_type(_e3) - as_type(1)); type_15 l0_1 = nested_mat_cx2_.am; metal::float4x2 l1_1 = nested_mat_cx2_.am.inner[0]; metal::float2 l2_1 = nested_mat_cx2_.am.inner[0][0]; @@ -121,7 +134,7 @@ void test_matrix_within_array_within_struct_accesses( int _e48 = idx_1; float l7_ = nested_mat_cx2_.am.inner[0][_e46][_e48]; int _e55 = idx_1; - idx_1 = _e55 + 1; + idx_1 = as_type(as_type(_e55) + as_type(1)); t_1.am = type_15 {}; t_1.am.inner[0] = metal::float4x2(metal::float2(8.0), metal::float2(7.0), metal::float2(6.0), metal::float2(5.0)); t_1.am.inner[0][0] = metal::float2(9.0); @@ -193,6 +206,48 @@ void assign_to_arg_ptr_array_element( return; } +bool index_ptr( + bool value +) { + type_28 a_1 = {}; + a_1 = type_28 {value}; + bool _e4 = a_1.inner[0]; + return _e4; +} + +int member_ptr( +) { + S s = S {42}; + int _e4 = s.m; + return _e4; +} + +int let_members_of_members( +) { + Inner inner_1 = Outer {}.om_nom_nom; + int delishus_1 = Outer {}.om_nom_nom.delicious; + if (Outer {}.thing != static_cast(delishus_1)) { + } + return Outer {}.om_nom_nom.delicious; +} + +int var_members_of_members( +) { + Outer thing = Outer {}; + Inner inner = {}; + int delishus = {}; + Inner _e3 = thing.om_nom_nom; + inner = _e3; + int _e6 = inner.delicious; + delishus = _e6; + uint _e9 = thing.thing; + int _e10 = delishus; + if (_e9 != static_cast(_e10)) { + } + int _e15 = thing.om_nom_nom.delicious; + return _e15; +} + struct foo_vertInput { }; struct foo_vertOutput { @@ -215,14 +270,14 @@ vertex foo_vertOutput foo_vert( metal::float4x3 _matrix = bar._matrix; type_10 arr_1 = bar.arr; float b = bar._matrix[3u].x; - int a_1 = bar.data[(1 + (_buffer_sizes.size1 - 160 - 8) / 8) - 2u].value; + int a_2 = bar.data[(1 + (_buffer_sizes.size1 - 160 - 8) / 8) - 2u].value; metal::int2 c = qux; float _e33 = read_from_private(foo); - c2_ = type_20 {a_1, static_cast(b), 3, 4, 5}; + c2_ = type_20 {a_2, static_cast(b), 3, 4, 5}; c2_.inner[vi + 1u] = 42; - int value = c2_.inner[vi]; + int value_1 = c2_.inner[vi]; float _e47 = test_arr_as_arg(type_18 {}); - return foo_vertOutput { metal::float4(_matrix * static_cast(metal::int4(value)), 2.0) }; + return foo_vertOutput { metal::float4(_matrix * static_cast(metal::int4(value_1)), 2.0) }; } diff --git a/naga/tests/out/msl/array-in-ctor.msl b/naga/tests/out/msl/array-in-ctor.msl index a3bbb2057c..776a7b4bc6 100644 --- a/naga/tests/out/msl/array-in-ctor.msl +++ b/naga/tests/out/msl/array-in-ctor.msl @@ -15,4 +15,5 @@ kernel void cs_main( device Ah const& ah [[user(fake0)]] ) { Ah ah_1 = ah; + return; } diff --git a/naga/tests/out/msl/array-in-function-return-type.msl b/naga/tests/out/msl/array-in-function-return-type.msl index 77399f6424..71e6862e91 100644 --- a/naga/tests/out/msl/array-in-function-return-type.msl +++ b/naga/tests/out/msl/array-in-function-return-type.msl @@ -7,17 +7,28 @@ using metal::uint; struct type_1 { float inner[2]; }; +struct type_2 { + type_1 inner[3]; +}; type_1 ret_array( ) { return type_1 {1.0, 2.0}; } +type_2 ret_array_array( +) { + type_1 _e0 = ret_array(); + type_1 _e1 = ret_array(); + type_1 _e2 = ret_array(); + return type_2 {_e0, _e1, _e2}; +} + struct main_Output { metal::float4 member [[color(0)]]; }; fragment main_Output main_( ) { - type_1 _e0 = ret_array(); - return main_Output { metal::float4(_e0.inner[0], _e0.inner[1], 0.0, 1.0) }; + type_2 _e0 = ret_array_array(); + return main_Output { metal::float4(_e0.inner[0].inner[0], _e0.inner[0].inner[1], 0.0, 1.0) }; } diff --git a/naga/tests/out/msl/atomicCompareExchange.msl b/naga/tests/out/msl/atomicCompareExchange.msl index 6655fad7e2..633f3226e0 100644 --- a/naga/tests/out/msl/atomicCompareExchange.msl +++ b/naga/tests/out/msl/atomicCompareExchange.msl @@ -76,8 +76,11 @@ kernel void test_atomic_compare_exchange_i32_( uint i = 0u; int old = {}; bool exchanged = {}; + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e27 = i; i = _e27 + 1u; @@ -93,7 +96,10 @@ kernel void test_atomic_compare_exchange_i32_( int _e8 = metal::atomic_load_explicit(&arr_i32_.inner[_e6], metal::memory_order_relaxed); old = _e8; exchanged = false; + uint2 loop_bound_1 = uint2(0u); while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); bool _e12 = exchanged; if (!(_e12)) { } else { @@ -108,11 +114,8 @@ kernel void test_atomic_compare_exchange_i32_( old = _e23.old_value; exchanged = _e23.exchanged; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } } - LOOP_IS_BOUNDED } return; } @@ -124,8 +127,11 @@ kernel void test_atomic_compare_exchange_u32_( uint i_1 = 0u; uint old_1 = {}; bool exchanged_1 = {}; + uint2 loop_bound_2 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_1) { uint _e27 = i_1; i_1 = _e27 + 1u; @@ -141,7 +147,10 @@ kernel void test_atomic_compare_exchange_u32_( uint _e8 = metal::atomic_load_explicit(&arr_u32_.inner[_e6], metal::memory_order_relaxed); old_1 = _e8; exchanged_1 = false; + uint2 loop_bound_3 = uint2(0u); while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); bool _e12 = exchanged_1; if (!(_e12)) { } else { @@ -156,10 +165,8 @@ kernel void test_atomic_compare_exchange_u32_( old_1 = _e23.old_value; exchanged_1 = _e23.exchanged; } - LOOP_IS_BOUNDED } } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/boids.msl b/naga/tests/out/msl/boids.msl index 07acd7cf62..f278d3ad68 100644 --- a/naga/tests/out/msl/boids.msl +++ b/naga/tests/out/msl/boids.msl @@ -55,8 +55,11 @@ kernel void main_( vPos = _e8; metal::float2 _e14 = particlesSrc.particles[index].vel; vVel = _e14; + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e91 = i; i = _e91 + 1u; @@ -84,7 +87,7 @@ kernel void main_( metal::float2 _e61 = pos; cMass = _e60 + _e61; int _e63 = cMassCount; - cMassCount = _e63 + 1; + cMassCount = as_type(as_type(_e63) + as_type(1)); } metal::float2 _e66 = pos; metal::float2 _e67 = vPos; @@ -103,10 +106,8 @@ kernel void main_( metal::float2 _e86 = vel; cVel = _e85 + _e86; int _e88 = cVelCount; - cVelCount = _e88 + 1; + cVelCount = as_type(as_type(_e88) + as_type(1)); } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } int _e94 = cMassCount; if (_e94 > 0) { diff --git a/naga/tests/out/msl/break-if.msl b/naga/tests/out/msl/break-if.msl index 4d3397234b..0336ac1b4c 100644 --- a/naga/tests/out/msl/break-if.msl +++ b/naga/tests/out/msl/break-if.msl @@ -7,16 +7,17 @@ using metal::uint; void breakIfEmpty( ) { + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { if (true) { break; } } loop_init = false; -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } @@ -26,8 +27,11 @@ void breakIfEmptyBody( ) { bool b = {}; bool c = {}; + uint2 loop_bound_1 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { b = a; bool _e2 = b; @@ -38,7 +42,6 @@ void breakIfEmptyBody( } } loop_init_1 = false; - LOOP_IS_BOUNDED } return; } @@ -48,8 +51,11 @@ void breakIf( ) { bool d = {}; bool e = {}; + uint2 loop_bound_2 = uint2(0u); bool loop_init_2 = true; while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); if (!loop_init_2) { bool _e5 = e; if (a_1 == e) { @@ -60,7 +66,6 @@ void breakIf( d = a_1; bool _e2 = d; e = a_1 != _e2; - LOOP_IS_BOUNDED } return; } @@ -68,8 +73,11 @@ void breakIf( void breakIfSeparateVariable( ) { uint counter = 0u; + uint2 loop_bound_3 = uint2(0u); bool loop_init_3 = true; while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); if (!loop_init_3) { uint _e5 = counter; if (counter == 5u) { @@ -79,7 +87,6 @@ void breakIfSeparateVariable( loop_init_3 = false; uint _e3 = counter; counter = _e3 + 1u; - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/collatz.msl b/naga/tests/out/msl/collatz.msl index e282d13abb..eef2c83c34 100644 --- a/naga/tests/out/msl/collatz.msl +++ b/naga/tests/out/msl/collatz.msl @@ -19,7 +19,10 @@ uint collatz_iterations( uint n = {}; uint i = 0u; n = n_base; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); uint _e4 = n; if (_e4 > 1u) { } else { @@ -37,8 +40,6 @@ uint collatz_iterations( uint _e20 = i; i = _e20 + 1u; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } uint _e23 = i; return _e23; diff --git a/naga/tests/out/msl/const-exprs.msl b/naga/tests/out/msl/const-exprs.msl index 7798ae62b3..cb0959f72e 100644 --- a/naga/tests/out/msl/const-exprs.msl +++ b/naga/tests/out/msl/const-exprs.msl @@ -22,16 +22,19 @@ constant metal::bool2 compare_vec = metal::bool2(true, false); void swizzle_of_compose( ) { metal::int4 out = metal::int4(4, 3, 2, 1); + return; } void index_of_compose( ) { int out_1 = 2; + return; } void compose_three_deep( ) { int out_2 = 6; + return; } void non_constant_initializers( @@ -56,16 +59,19 @@ void non_constant_initializers( void splat_of_constant( ) { metal::int4 out_4 = metal::int4(-4, -4, -4, -4); + return; } void compose_of_constant( ) { metal::int4 out_5 = metal::int4(-4, -4, -4, -4); + return; } void compose_of_splat( ) { metal::float4 x_1 = metal::float4(2.0, 1.0, 1.0, 1.0); + return; } uint map_texture_kind( diff --git a/naga/tests/out/msl/constructors.msl b/naga/tests/out/msl/constructors.msl index d4dc5c5292..1084d8e8aa 100644 --- a/naga/tests/out/msl/constructors.msl +++ b/naga/tests/out/msl/constructors.msl @@ -41,4 +41,5 @@ kernel void main_( type_11 cit2_ = type_11 {0, 1, 2, 3}; metal::uint2 ic4_ = metal::uint2(0u, 0u); metal::float2x3 ic5_ = metal::float2x3(metal::float3(0.0, 0.0, 0.0), metal::float3(0.0, 0.0, 0.0)); + return; } diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl index 1b35249f36..3c73656af7 100644 --- a/naga/tests/out/msl/control-flow.msl +++ b/naga/tests/out/msl/control-flow.msl @@ -31,7 +31,10 @@ void switch_case_break( void loop_switch_continue( int x ) { + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); switch(x) { case 1: { continue; @@ -40,8 +43,6 @@ void loop_switch_continue( break; } } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } @@ -51,7 +52,10 @@ void loop_switch_continue_nesting( int y, int z ) { + uint2 loop_bound_1 = uint2(0u); while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); switch(x_1) { case 1: { continue; @@ -62,7 +66,10 @@ void loop_switch_continue_nesting( continue; } default: { + uint2 loop_bound_2 = uint2(0u); while(true) { + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); switch(z) { case 1: { continue; @@ -71,7 +78,6 @@ void loop_switch_continue_nesting( break; } } - LOOP_IS_BOUNDED } break; } @@ -87,9 +93,11 @@ void loop_switch_continue_nesting( continue; } } - LOOP_IS_BOUNDED } + uint2 loop_bound_3 = uint2(0u); while(true) { + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); switch(y) { case 1: default: { @@ -101,7 +109,6 @@ void loop_switch_continue_nesting( break; } } - LOOP_IS_BOUNDED } return; } @@ -113,7 +120,10 @@ void loop_switch_omit_continue_variable_checks( int w ) { int pos_1 = 0; + uint2 loop_bound_4 = uint2(0u); while(true) { + if (metal::all(loop_bound_4 == uint2(4294967295u))) { break; } + loop_bound_4 += uint2(loop_bound_4.y == 4294967295u, 1u); switch(x_2) { case 1: { pos_1 = 1; @@ -123,9 +133,11 @@ void loop_switch_omit_continue_variable_checks( break; } } - LOOP_IS_BOUNDED } + uint2 loop_bound_5 = uint2(0u); while(true) { + if (metal::all(loop_bound_5 == uint2(4294967295u))) { break; } + loop_bound_5 += uint2(loop_bound_5.y == 4294967295u, 1u); switch(x_2) { case 1: { break; @@ -154,7 +166,6 @@ void loop_switch_omit_continue_variable_checks( break; } } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/cross.msl b/naga/tests/out/msl/cross.msl index 70095cd6e9..a402e73e32 100644 --- a/naga/tests/out/msl/cross.msl +++ b/naga/tests/out/msl/cross.msl @@ -8,4 +8,5 @@ using metal::uint; kernel void main_( ) { metal::float3 a = metal::cross(metal::float3(0.0, 1.0, 2.0), metal::float3(0.0, 1.0, 2.0)); + return; } diff --git a/naga/tests/out/msl/do-while.msl b/naga/tests/out/msl/do-while.msl index 2a883304d1..af55bfcc56 100644 --- a/naga/tests/out/msl/do-while.msl +++ b/naga/tests/out/msl/do-while.msl @@ -8,8 +8,11 @@ using metal::uint; void fb1_( thread bool& cond ) { + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { bool _e1 = cond; if (!(cond)) { @@ -18,8 +21,6 @@ void fb1_( } loop_init = false; continue; -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/empty-global-name.msl b/naga/tests/out/msl/empty-global-name.msl index 01cac3f6e0..5db7fdd932 100644 --- a/naga/tests/out/msl/empty-global-name.msl +++ b/naga/tests/out/msl/empty-global-name.msl @@ -12,7 +12,7 @@ void function( device type_1& unnamed ) { int _e3 = unnamed.member; - unnamed.member = _e3 + 1; + unnamed.member = as_type(as_type(_e3) + as_type(1)); return; } diff --git a/naga/tests/out/msl/globals.msl b/naga/tests/out/msl/globals.msl index d2ed89ed46..b976cc2018 100644 --- a/naga/tests/out/msl/globals.msl +++ b/naga/tests/out/msl/globals.msl @@ -56,6 +56,7 @@ void test_msl_packed_vec3_( metal::float3 mvm1_ = metal::float3x3 {} * metal::float3(data.v3_); metal::float3 svm0_ = data.v3_ * 2.0; metal::float3 svm1_ = 2.0 * data.v3_; + return; } kernel void main_( diff --git a/naga/tests/out/msl/image.msl b/naga/tests/out/msl/image.msl index 114ed36553..1a8567a0ca 100644 --- a/naga/tests/out/msl/image.msl +++ b/naga/tests/out/msl/image.msl @@ -21,14 +21,14 @@ kernel void main_( metal::uint4 value1_ = image_mipmapped_src.read(metal::uint2(itc), static_cast(local_id.z)); metal::uint4 value2_ = image_multisampled_src.read(metal::uint2(itc), static_cast(local_id.z)); metal::uint4 value4_ = image_storage_src.read(metal::uint2(itc)); - metal::uint4 value5_ = image_array_src.read(metal::uint2(itc), local_id.z, static_cast(local_id.z) + 1); - metal::uint4 value6_ = image_array_src.read(metal::uint2(itc), static_cast(local_id.z), static_cast(local_id.z) + 1); + metal::uint4 value5_ = image_array_src.read(metal::uint2(itc), local_id.z, as_type(as_type(static_cast(local_id.z)) + as_type(1))); + metal::uint4 value6_ = image_array_src.read(metal::uint2(itc), static_cast(local_id.z), as_type(as_type(static_cast(local_id.z)) + as_type(1))); metal::uint4 value7_ = image_1d_src.read(uint(static_cast(local_id.x))); metal::uint4 value1u = image_mipmapped_src.read(metal::uint2(static_cast(itc)), static_cast(local_id.z)); metal::uint4 value2u = image_multisampled_src.read(metal::uint2(static_cast(itc)), static_cast(local_id.z)); metal::uint4 value4u = image_storage_src.read(metal::uint2(static_cast(itc))); - metal::uint4 value5u = image_array_src.read(metal::uint2(static_cast(itc)), local_id.z, static_cast(local_id.z) + 1); - metal::uint4 value6u = image_array_src.read(metal::uint2(static_cast(itc)), static_cast(local_id.z), static_cast(local_id.z) + 1); + metal::uint4 value5u = image_array_src.read(metal::uint2(static_cast(itc)), local_id.z, as_type(as_type(static_cast(local_id.z)) + as_type(1))); + metal::uint4 value6u = image_array_src.read(metal::uint2(static_cast(itc)), static_cast(local_id.z), as_type(as_type(static_cast(local_id.z)) + as_type(1))); metal::uint4 value7u = image_1d_src.read(uint(static_cast(local_id.x))); image_dst.write((((value1_ + value2_) + value4_) + value5_) + value6_, uint(itc.x)); image_dst.write((((value1u + value2u) + value4u) + value5u) + value6u, uint(static_cast(itc.x))); diff --git a/naga/tests/out/msl/int64.msl b/naga/tests/out/msl/int64.msl index 79a3c06889..369eb01c6f 100644 --- a/naga/tests/out/msl/int64.msl +++ b/naga/tests/out/msl/int64.msl @@ -44,76 +44,76 @@ long int64_function( ) { long val = 20L; long _e8 = val; - val = _e8 + ((31L - 1002003004005006L) + -9223372036854775807L); + val = as_type(as_type(_e8) + as_type(as_type(as_type(as_type(as_type(31L) - as_type(1002003004005006L))) + as_type(-9223372036854775807L)))); long _e10 = val; long _e13 = val; - val = _e13 + (_e10 + 5L); + val = as_type(as_type(_e13) + as_type(as_type(as_type(_e10) + as_type(5L)))); uint _e17 = input_uniform.val_u32_; long _e18 = val; long _e22 = val; - val = _e22 + static_cast(_e17 + static_cast(_e18)); + val = as_type(as_type(_e22) + as_type(static_cast(_e17 + static_cast(_e18)))); int _e26 = input_uniform.val_i32_; long _e27 = val; long _e31 = val; - val = _e31 + static_cast(_e26 + static_cast(_e27)); + val = as_type(as_type(_e31) + as_type(static_cast(as_type(as_type(_e26) + as_type(static_cast(_e27)))))); float _e35 = input_uniform.val_f32_; long _e36 = val; long _e40 = val; - val = _e40 + static_cast(_e35 + static_cast(_e36)); + val = as_type(as_type(_e40) + as_type(static_cast(_e35 + static_cast(_e36)))); long _e44 = input_uniform.val_i64_; long _e47 = val; - val = _e47 + metal::long3(_e44).z; + val = as_type(as_type(_e47) + as_type(metal::long3(_e44).z)); ulong _e51 = input_uniform.val_u64_; long _e53 = val; - val = _e53 + as_type(_e51); + val = as_type(as_type(_e53) + as_type(as_type(_e51))); metal::ulong2 _e57 = input_uniform.val_u64_2_; long _e60 = val; - val = _e60 + as_type(_e57).y; + val = as_type(as_type(_e60) + as_type(as_type(_e57).y)); metal::ulong3 _e64 = input_uniform.val_u64_3_; long _e67 = val; - val = _e67 + as_type(_e64).z; + val = as_type(as_type(_e67) + as_type(as_type(_e64).z)); metal::ulong4 _e71 = input_uniform.val_u64_4_; long _e74 = val; - val = _e74 + as_type(_e71).w; + val = as_type(as_type(_e74) + as_type(as_type(_e71).w)); long _e80 = input_uniform.val_i64_; long _e83 = input_storage.val_i64_; - output.val_i64_ = _e80 + _e83; + output.val_i64_ = as_type(as_type(_e80) + as_type(_e83)); metal::long2 _e89 = input_uniform.val_i64_2_; metal::long2 _e92 = input_storage.val_i64_2_; - output.val_i64_2_ = _e89 + _e92; + output.val_i64_2_ = as_type(as_type(_e89) + as_type(_e92)); metal::long3 _e98 = input_uniform.val_i64_3_; metal::long3 _e101 = input_storage.val_i64_3_; - output.val_i64_3_ = _e98 + _e101; + output.val_i64_3_ = as_type(as_type(_e98) + as_type(_e101)); metal::long4 _e107 = input_uniform.val_i64_4_; metal::long4 _e110 = input_storage.val_i64_4_; - output.val_i64_4_ = _e107 + _e110; + output.val_i64_4_ = as_type(as_type(_e107) + as_type(_e110)); type_12 _e116 = input_arrays.val_i64_array_2_; output_arrays.val_i64_array_2_ = _e116; long _e117 = val; long _e119 = val; - val = _e119 + metal::abs(_e117); + val = as_type(as_type(_e119) + as_type(metal::abs(_e117))); long _e121 = val; long _e122 = val; long _e123 = val; long _e125 = val; - val = _e125 + metal::clamp(_e121, _e122, _e123); + val = as_type(as_type(_e125) + as_type(metal::clamp(_e121, _e122, _e123))); long _e127 = val; metal::long2 _e128 = metal::long2(_e127); long _e129 = val; metal::long2 _e130 = metal::long2(_e129); long _e132 = val; - val = _e132 + ( + _e128.x * _e130.x + _e128.y * _e130.y); + val = as_type(as_type(_e132) + as_type(( + _e128.x * _e130.x + _e128.y * _e130.y))); long _e134 = val; long _e135 = val; long _e137 = val; - val = _e137 + metal::max(_e134, _e135); + val = as_type(as_type(_e137) + as_type(metal::max(_e134, _e135))); long _e139 = val; long _e140 = val; long _e142 = val; - val = _e142 + metal::min(_e139, _e140); + val = as_type(as_type(_e142) + as_type(metal::min(_e139, _e140))); long _e144 = val; long _e146 = val; - val = _e146 + metal::select(metal::select(-1, 1, (_e144 > 0)), 0, (_e144 == 0)); + val = as_type(as_type(_e146) + as_type(metal::select(metal::select(long(-1), long(1), (_e144 > 0)), long(0), (_e144 == 0)))); long _e148 = val; return _e148; } @@ -139,7 +139,7 @@ ulong uint64_function( int _e26 = input_uniform.val_i32_; ulong _e27 = val_1; ulong _e31 = val_1; - val_1 = _e31 + static_cast(_e26 + static_cast(_e27)); + val_1 = _e31 + static_cast(as_type(as_type(_e26) + as_type(static_cast(_e27)))); float _e35 = input_uniform.val_f32_; ulong _e36 = val_1; ulong _e40 = val_1; diff --git a/naga/tests/out/msl/math-functions.msl b/naga/tests/out/msl/math-functions.msl index f3dbe0a20d..97f3ea7317 100644 --- a/naga/tests/out/msl/math-functions.msl +++ b/naga/tests/out/msl/math-functions.msl @@ -93,4 +93,5 @@ fragment void main_( metal::float2 quantizeToF16_b = metal::float2(metal::half2(metal::float2(1.0, 1.0))); metal::float3 quantizeToF16_c = metal::float3(metal::half3(metal::float3(1.0, 1.0, 1.0))); metal::float4 quantizeToF16_d = metal::float4(metal::half4(metal::float4(1.0, 1.0, 1.0, 1.0))); + return; } diff --git a/naga/tests/out/msl/operators.msl b/naga/tests/out/msl/operators.msl index 85fba28c33..9b9b916cb2 100644 --- a/naga/tests/out/msl/operators.msl +++ b/naga/tests/out/msl/operators.msl @@ -19,7 +19,7 @@ metal::float4 builtins( float b1_ = as_type(1); metal::float4 b2_ = as_type(v_i32_one); metal::int4 v_i32_zero = metal::int4(0, 0, 0, 0); - return ((((static_cast(metal::int4(s1_) + v_i32_zero) + s2_) + m1_) + m2_) + metal::float4(b1_)) + b2_; + return ((((static_cast(as_type(as_type(metal::int4(s1_)) + as_type(v_i32_zero))) + s2_) + m1_) + m2_) + metal::float4(b1_)) + b2_; } metal::float4 splat( @@ -61,6 +61,7 @@ void logical( metal::bool3 bitwise_or1_ = metal::bool3(true) | metal::bool3(false); bool bitwise_and0_ = true & false; metal::bool4 bitwise_and1_ = metal::bool4(true) & metal::bool4(false); + return; } void arithmetic( @@ -68,22 +69,22 @@ void arithmetic( float neg0_1 = -(1.0); metal::int2 neg1_1 = -(metal::int2(1)); metal::float2 neg2_ = -(metal::float2(1.0)); - int add0_ = 2 + 1; + int add0_ = as_type(as_type(2) + as_type(1)); uint add1_ = 2u + 1u; float add2_ = 2.0 + 1.0; - metal::int2 add3_ = metal::int2(2) + metal::int2(1); + metal::int2 add3_ = as_type(as_type(metal::int2(2)) + as_type(metal::int2(1))); metal::uint3 add4_ = metal::uint3(2u) + metal::uint3(1u); metal::float4 add5_ = metal::float4(2.0) + metal::float4(1.0); - int sub0_ = 2 - 1; + int sub0_ = as_type(as_type(2) - as_type(1)); uint sub1_ = 2u - 1u; float sub2_ = 2.0 - 1.0; - metal::int2 sub3_ = metal::int2(2) - metal::int2(1); + metal::int2 sub3_ = as_type(as_type(metal::int2(2)) - as_type(metal::int2(1))); metal::uint3 sub4_ = metal::uint3(2u) - metal::uint3(1u); metal::float4 sub5_ = metal::float4(2.0) - metal::float4(1.0); - int mul0_ = 2 * 1; + int mul0_ = as_type(as_type(2) * as_type(1)); uint mul1_ = 2u * 1u; float mul2_ = 2.0 * 1.0; - metal::int2 mul3_ = metal::int2(2) * metal::int2(1); + metal::int2 mul3_ = as_type(as_type(metal::int2(2)) * as_type(metal::int2(1))); metal::uint3 mul4_ = metal::uint3(2u) * metal::uint3(1u); metal::float4 mul5_ = metal::float4(2.0) * metal::float4(1.0); int div0_ = 2 / 1; @@ -99,20 +100,20 @@ void arithmetic( metal::uint3 rem4_ = metal::uint3(2u) % metal::uint3(1u); metal::float4 rem5_ = metal::fmod(metal::float4(2.0), metal::float4(1.0)); { - metal::int2 add0_1 = metal::int2(2) + metal::int2(1); - metal::int2 add1_1 = metal::int2(2) + metal::int2(1); + metal::int2 add0_1 = as_type(as_type(metal::int2(2)) + as_type(metal::int2(1))); + metal::int2 add1_1 = as_type(as_type(metal::int2(2)) + as_type(metal::int2(1))); metal::uint2 add2_1 = metal::uint2(2u) + metal::uint2(1u); metal::uint2 add3_1 = metal::uint2(2u) + metal::uint2(1u); metal::float2 add4_1 = metal::float2(2.0) + metal::float2(1.0); metal::float2 add5_1 = metal::float2(2.0) + metal::float2(1.0); - metal::int2 sub0_1 = metal::int2(2) - metal::int2(1); - metal::int2 sub1_1 = metal::int2(2) - metal::int2(1); + metal::int2 sub0_1 = as_type(as_type(metal::int2(2)) - as_type(metal::int2(1))); + metal::int2 sub1_1 = as_type(as_type(metal::int2(2)) - as_type(metal::int2(1))); metal::uint2 sub2_1 = metal::uint2(2u) - metal::uint2(1u); metal::uint2 sub3_1 = metal::uint2(2u) - metal::uint2(1u); metal::float2 sub4_1 = metal::float2(2.0) - metal::float2(1.0); metal::float2 sub5_1 = metal::float2(2.0) - metal::float2(1.0); - metal::int2 mul0_1 = metal::int2(2) * 1; - metal::int2 mul1_1 = 2 * metal::int2(1); + metal::int2 mul0_1 = as_type(as_type(metal::int2(2)) * as_type(1)); + metal::int2 mul1_1 = as_type(as_type(2) * as_type(metal::int2(1))); metal::uint2 mul2_1 = metal::uint2(2u) * 1u; metal::uint2 mul3_1 = 2u * metal::uint2(1u); metal::float2 mul4_1 = metal::float2(2.0) * 1.0; @@ -137,6 +138,7 @@ void arithmetic( metal::float3 mul_vector0_ = metal::float4x3 {} * metal::float4(1.0); metal::float4 mul_vector1_ = metal::float3(2.0) * metal::float4x3 {}; metal::float3x3 mul = metal::float4x3 {} * metal::float3x4 {}; + return; } void bit( @@ -165,6 +167,7 @@ void bit( uint shr1_ = 2u >> 1u; metal::int2 shr2_ = metal::int2(2) >> metal::uint2(1u); metal::uint3 shr3_ = metal::uint3(2u) >> metal::uint3(1u); + return; } void comparison( @@ -205,6 +208,7 @@ void comparison( metal::bool2 gte3_ = metal::int2(2) >= metal::int2(1); metal::bool3 gte4_ = metal::uint3(2u) >= metal::uint3(1u); metal::bool4 gte5_ = metal::float4(2.0) >= metal::float4(1.0); + return; } void assignment( @@ -213,12 +217,12 @@ void assignment( metal::int3 vec0_ = metal::int3 {}; a_1 = 1; int _e5 = a_1; - a_1 = _e5 + 1; + a_1 = as_type(as_type(_e5) + as_type(1)); int _e7 = a_1; - a_1 = _e7 - 1; + a_1 = as_type(as_type(_e7) - as_type(1)); int _e9 = a_1; int _e10 = a_1; - a_1 = _e10 * _e9; + a_1 = as_type(as_type(_e10) * as_type(_e9)); int _e12 = a_1; int _e13 = a_1; a_1 = _e13 / _e12; @@ -235,13 +239,13 @@ void assignment( int _e25 = a_1; a_1 = _e25 >> 1u; int _e28 = a_1; - a_1 = _e28 + 1; + a_1 = as_type(as_type(_e28) + as_type(1)); int _e31 = a_1; - a_1 = _e31 - 1; + a_1 = as_type(as_type(_e31) - as_type(1)); int _e37 = vec0_[1]; - vec0_[1] = _e37 + 1; + vec0_[1] = as_type(as_type(_e37) + as_type(1)); int _e41 = vec0_[1]; - vec0_[1] = _e41 - 1; + vec0_[1] = as_type(as_type(_e41) - as_type(1)); return; } @@ -255,6 +259,7 @@ void negation_avoids_prefix_decrement( int p5_ = -(-(-(-(1)))); int p6_ = -(-(-(-(-(1))))); int p7_ = -(-(-(-(-(1))))); + return; } struct main_Input { diff --git a/naga/tests/out/msl/overrides-ray-query.msl b/naga/tests/out/msl/overrides-ray-query.msl index 3aa0ee0359..d70011159b 100644 --- a/naga/tests/out/msl/overrides-ray-query.msl +++ b/naga/tests/out/msl/overrides-ray-query.msl @@ -33,15 +33,16 @@ kernel void main_( rq.intersector.force_opacity((desc.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (desc.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq.intersector.accept_any_intersection((desc.flags & 4) != 0); rq.intersection = rq.intersector.intersect(metal::raytracing::ray(desc.origin, desc.dir, desc.tmin, desc.tmax), acc_struct, desc.cull_mask); rq.ready = true; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool _e31 = rq.ready; rq.ready = false; if (_e31) { } else { break; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/phony_assignment.msl b/naga/tests/out/msl/phony_assignment.msl index daad571906..295bd7c0cc 100644 --- a/naga/tests/out/msl/phony_assignment.msl +++ b/naga/tests/out/msl/phony_assignment.msl @@ -21,4 +21,5 @@ kernel void main_( int _e6 = five(); int _e7 = five(); float phony_2 = binding; + return; } diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl index b8230fb2e8..37e63a6cd4 100644 --- a/naga/tests/out/msl/ray-query.msl +++ b/naga/tests/out/msl/ray-query.msl @@ -16,8 +16,8 @@ constexpr metal::uint _map_intersection_type(const metal::raytracing::intersecti struct RayIntersection { uint kind; float t; - uint instance_custom_index; - uint instance_id; + uint instance_custom_data; + uint instance_index; uint sbt_record_offset; uint geometry_index; uint primitive_index; @@ -53,15 +53,16 @@ RayIntersection query_loop( rq_1.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq_1.intersector.accept_any_intersection((_e8.flags & 4) != 0); rq_1.intersection = rq_1.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask); rq_1.ready = true; + uint2 loop_bound = uint2(0u); while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); bool _e9 = rq_1.ready; rq_1.ready = false; if (_e9) { } else { break; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } return RayIntersection {_map_intersection_type(rq_1.intersection.type), rq_1.intersection.distance, rq_1.intersection.user_instance_id, rq_1.intersection.instance_id, {}, rq_1.intersection.geometry_id, rq_1.intersection.primitive_id, rq_1.intersection.triangle_barycentric_coord, rq_1.intersection.triangle_front_facing, {}, rq_1.intersection.object_to_world_transform, rq_1.intersection.world_to_object_transform}; } diff --git a/naga/tests/out/msl/shadow.msl b/naga/tests/out/msl/shadow.msl index 18cc842110..c8ad03b3a5 100644 --- a/naga/tests/out/msl/shadow.msl +++ b/naga/tests/out/msl/shadow.msl @@ -100,8 +100,11 @@ fragment fs_mainOutput fs_main( metal::float3 color = c_ambient; uint i = 0u; metal::float3 normal_1 = metal::normalize(in.world_normal); + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); if (!loop_init) { uint _e40 = i; i = _e40 + 1u; @@ -123,8 +126,6 @@ fragment fs_mainOutput fs_main( metal::float3 _e37 = color; color = _e37 + ((_e23 * diffuse) * light.color.xyz); } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED } metal::float3 _e42 = color; metal::float4 _e47 = u_entity.color; @@ -152,8 +153,11 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 color_1 = c_ambient; uint i_1 = 0u; metal::float3 normal_2 = metal::normalize(in_1.world_normal); + uint2 loop_bound_1 = uint2(0u); bool loop_init_1 = true; while(true) { + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); if (!loop_init_1) { uint _e40 = i_1; i_1 = _e40 + 1u; @@ -175,7 +179,6 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 _e37 = color_1; color_1 = _e37 + ((_e23 * diffuse_1) * light_1.color.xyz); } - LOOP_IS_BOUNDED } metal::float3 _e42 = color_1; metal::float4 _e47 = u_entity.color; diff --git a/naga/tests/out/msl/storage-textures.msl b/naga/tests/out/msl/storage-textures.msl new file mode 100644 index 0000000000..c49f8a3575 --- /dev/null +++ b/naga/tests/out/msl/storage-textures.msl @@ -0,0 +1,29 @@ +// language: metal1.0 +#include +#include + +using metal::uint; + + +kernel void csLoad( + metal::texture2d s_r_r [[user(fake0)]] +, metal::texture2d s_rg_r [[user(fake0)]] +, metal::texture2d s_rgba_r [[user(fake0)]] +) { + metal::float4 phony = s_r_r.read(metal::uint2(metal::uint2(0u))); + metal::float4 phony_1 = s_rg_r.read(metal::uint2(metal::uint2(0u))); + metal::float4 phony_2 = s_rgba_r.read(metal::uint2(metal::uint2(0u))); + return; +} + + +kernel void csStore( + metal::texture2d s_r_w [[user(fake0)]] +, metal::texture2d s_rg_w [[user(fake0)]] +, metal::texture2d s_rgba_w [[user(fake0)]] +) { + s_r_w.write(metal::float4(0.0), metal::uint2(metal::uint2(0u))); + s_rg_w.write(metal::float4(0.0), metal::uint2(metal::uint2(0u))); + s_rgba_w.write(metal::float4(0.0), metal::uint2(metal::uint2(0u))); + return; +} diff --git a/naga/tests/out/spv/abstract-types-return.spvasm b/naga/tests/out/spv/abstract-types-return.spvasm new file mode 100644 index 0000000000..8ce671adfc --- /dev/null +++ b/naga/tests/out/spv/abstract-types-return.spvasm @@ -0,0 +1,70 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 41 +OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %38 "main" +OpExecutionMode %38 LocalSize 1 1 1 +OpDecorate %7 ArrayStride 4 +%2 = OpTypeVoid +%3 = OpTypeInt 32 1 +%4 = OpTypeInt 32 0 +%5 = OpTypeFloat 32 +%6 = OpTypeVector %5 2 +%8 = OpConstant %4 4 +%7 = OpTypeArray %5 %8 +%11 = OpTypeFunction %3 +%12 = OpConstant %3 1 +%16 = OpTypeFunction %4 +%17 = OpConstant %4 1 +%21 = OpTypeFunction %5 +%22 = OpConstant %5 1.0 +%29 = OpTypeFunction %6 +%30 = OpConstantComposite %6 %22 %22 +%34 = OpTypeFunction %7 +%35 = OpConstantComposite %7 %22 %22 %22 %22 +%39 = OpTypeFunction %2 +%10 = OpFunction %3 None %11 +%9 = OpLabel +OpBranch %13 +%13 = OpLabel +OpReturnValue %12 +OpFunctionEnd +%15 = OpFunction %4 None %16 +%14 = OpLabel +OpBranch %18 +%18 = OpLabel +OpReturnValue %17 +OpFunctionEnd +%20 = OpFunction %5 None %21 +%19 = OpLabel +OpBranch %23 +%23 = OpLabel +OpReturnValue %22 +OpFunctionEnd +%25 = OpFunction %5 None %21 +%24 = OpLabel +OpBranch %26 +%26 = OpLabel +OpReturnValue %22 +OpFunctionEnd +%28 = OpFunction %6 None %29 +%27 = OpLabel +OpBranch %31 +%31 = OpLabel +OpReturnValue %30 +OpFunctionEnd +%33 = OpFunction %7 None %34 +%32 = OpLabel +OpBranch %36 +%36 = OpLabel +OpReturnValue %35 +OpFunctionEnd +%38 = OpFunction %2 None %39 +%37 = OpLabel +OpBranch %40 +%40 = OpLabel +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/access.spvasm b/naga/tests/out/spv/access.spvasm index b2f87bdcee..1e446bd9c5 100644 --- a/naga/tests/out/spv/access.spvasm +++ b/naga/tests/out/spv/access.spvasm @@ -1,18 +1,18 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 342 +; Bound: 402 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint Vertex %250 "foo_vert" %245 %248 -OpEntryPoint Fragment %303 "foo_frag" %302 -OpEntryPoint GLCompute %321 "assign_through_ptr" -OpEntryPoint GLCompute %332 "assign_to_ptr_components" -OpExecutionMode %303 OriginUpperLeft -OpExecutionMode %321 LocalSize 1 1 1 -OpExecutionMode %332 LocalSize 1 1 1 +OpEntryPoint Vertex %311 "foo_vert" %306 %309 +OpEntryPoint Fragment %363 "foo_frag" %362 +OpEntryPoint GLCompute %381 "assign_through_ptr" +OpEntryPoint GLCompute %392 "assign_to_ptr_components" +OpExecutionMode %363 OriginUpperLeft +OpExecutionMode %381 LocalSize 1 1 1 +OpExecutionMode %392 LocalSize 1 1 1 OpMemberName %6 0 "a" OpMemberName %6 1 "b" OpMemberName %6 2 "c" @@ -32,44 +32,61 @@ OpMemberName %26 0 "am" OpName %26 "MatCx2InArray" OpMemberName %36 0 "x" OpName %36 "AssignToMember" -OpName %45 "global_const" -OpName %47 "bar" -OpName %49 "baz" -OpName %52 "qux" -OpName %55 "nested_mat_cx2" -OpName %59 "test_matrix_within_struct_accesses" -OpName %87 "idx" -OpName %89 "t" -OpName %135 "test_matrix_within_array_within_struct_accesses" -OpName %145 "idx" -OpName %146 "t" -OpName %192 "foo" -OpName %193 "read_from_private" -OpName %198 "a" -OpName %199 "test_arr_as_arg" -OpName %205 "p" -OpName %206 "assign_through_ptr_fn" -OpName %211 "foo" -OpName %212 "assign_array_through_ptr_fn" -OpName %219 "p" -OpName %220 "fetch_arg_ptr_member" -OpName %226 "p" -OpName %227 "assign_to_arg_ptr_member" -OpName %232 "p" -OpName %233 "fetch_arg_ptr_array_element" -OpName %239 "p" -OpName %240 "assign_to_arg_ptr_array_element" -OpName %245 "vi" -OpName %250 "foo_vert" -OpName %262 "foo" -OpName %263 "c2" -OpName %303 "foo_frag" -OpName %321 "assign_through_ptr" -OpName %326 "val" -OpName %327 "arr" -OpName %332 "assign_to_ptr_components" -OpName %333 "s1" -OpName %335 "a1" +OpMemberName %44 0 "m" +OpName %44 "S" +OpMemberName %45 0 "delicious" +OpName %45 "Inner" +OpMemberName %46 0 "om_nom_nom" +OpMemberName %46 1 "thing" +OpName %46 "Outer" +OpName %51 "global_const" +OpName %53 "bar" +OpName %55 "baz" +OpName %58 "qux" +OpName %61 "nested_mat_cx2" +OpName %65 "test_matrix_within_struct_accesses" +OpName %93 "idx" +OpName %95 "t" +OpName %140 "test_matrix_within_array_within_struct_accesses" +OpName %150 "idx" +OpName %151 "t" +OpName %197 "foo" +OpName %198 "read_from_private" +OpName %203 "a" +OpName %204 "test_arr_as_arg" +OpName %210 "p" +OpName %211 "assign_through_ptr_fn" +OpName %216 "foo" +OpName %217 "assign_array_through_ptr_fn" +OpName %224 "p" +OpName %225 "fetch_arg_ptr_member" +OpName %231 "p" +OpName %232 "assign_to_arg_ptr_member" +OpName %237 "p" +OpName %238 "fetch_arg_ptr_array_element" +OpName %244 "p" +OpName %245 "assign_to_arg_ptr_array_element" +OpName %250 "value" +OpName %251 "index_ptr" +OpName %253 "a" +OpName %262 "member_ptr" +OpName %266 "s" +OpName %272 "let_members_of_members" +OpName %284 "var_members_of_members" +OpName %285 "thing" +OpName %287 "inner" +OpName %290 "delishus" +OpName %306 "vi" +OpName %311 "foo_vert" +OpName %322 "foo" +OpName %323 "c2" +OpName %363 "foo_frag" +OpName %381 "assign_through_ptr" +OpName %386 "val" +OpName %387 "arr" +OpName %392 "assign_to_ptr_components" +OpName %393 "s1" +OpName %395 "a1" OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 16 OpMemberDecorate %6 2 Offset 28 @@ -102,23 +119,28 @@ OpDecorate %32 ArrayStride 4 OpDecorate %34 ArrayStride 16 OpMemberDecorate %36 0 Offset 0 OpDecorate %38 ArrayStride 4 -OpDecorate %47 DescriptorSet 0 -OpDecorate %47 Binding 0 -OpDecorate %49 DescriptorSet 0 -OpDecorate %49 Binding 1 -OpDecorate %50 Block -OpMemberDecorate %50 0 Offset 0 -OpDecorate %52 DescriptorSet 0 -OpDecorate %52 Binding 2 -OpDecorate %53 Block -OpMemberDecorate %53 0 Offset 0 +OpDecorate %42 ArrayStride 1 +OpMemberDecorate %44 0 Offset 0 +OpMemberDecorate %45 0 Offset 0 +OpMemberDecorate %46 0 Offset 0 +OpMemberDecorate %46 1 Offset 4 +OpDecorate %53 DescriptorSet 0 +OpDecorate %53 Binding 0 OpDecorate %55 DescriptorSet 0 -OpDecorate %55 Binding 3 +OpDecorate %55 Binding 1 OpDecorate %56 Block OpMemberDecorate %56 0 Offset 0 -OpDecorate %245 BuiltIn VertexIndex -OpDecorate %248 BuiltIn Position -OpDecorate %302 Location 0 +OpDecorate %58 DescriptorSet 0 +OpDecorate %58 Binding 2 +OpDecorate %59 Block +OpMemberDecorate %59 0 Offset 0 +OpDecorate %61 DescriptorSet 0 +OpDecorate %61 Binding 3 +OpDecorate %62 Block +OpMemberDecorate %62 0 Offset 0 +OpDecorate %306 BuiltIn VertexIndex +OpDecorate %309 BuiltIn Position +OpDecorate %362 Location 0 %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeVector %3 3 @@ -158,379 +180,458 @@ OpDecorate %302 Location 0 %39 = OpConstant %3 4 %38 = OpTypeArray %3 %39 %40 = OpTypePointer Function %38 -%41 = OpConstant %3 0 -%42 = OpConstantComposite %4 %41 %41 %41 -%43 = OpConstant %5 0 -%44 = OpConstantComposite %6 %41 %42 %43 -%46 = OpTypePointer Private %6 -%45 = OpVariable %46 Private %44 -%48 = OpTypePointer StorageBuffer %20 -%47 = OpVariable %48 StorageBuffer -%50 = OpTypeStruct %22 -%51 = OpTypePointer Uniform %50 -%49 = OpVariable %51 Uniform -%53 = OpTypeStruct %23 -%54 = OpTypePointer StorageBuffer %53 -%52 = OpVariable %54 StorageBuffer -%56 = OpTypeStruct %26 +%41 = OpTypeBool +%43 = OpConstant %3 1 +%42 = OpTypeArray %41 %43 +%44 = OpTypeStruct %5 +%45 = OpTypeStruct %5 +%46 = OpTypeStruct %45 %3 +%47 = OpConstant %3 0 +%48 = OpConstantComposite %4 %47 %47 %47 +%49 = OpConstant %5 0 +%50 = OpConstantComposite %6 %47 %48 %49 +%52 = OpTypePointer Private %6 +%51 = OpVariable %52 Private %50 +%54 = OpTypePointer StorageBuffer %20 +%53 = OpVariable %54 StorageBuffer +%56 = OpTypeStruct %22 %57 = OpTypePointer Uniform %56 %55 = OpVariable %57 Uniform -%60 = OpTypeFunction %2 -%61 = OpTypePointer Uniform %22 -%63 = OpConstant %5 1 -%64 = OpConstant %8 1.0 -%65 = OpConstantComposite %12 %64 %64 -%66 = OpConstant %8 2.0 -%67 = OpConstantComposite %12 %66 %66 -%68 = OpConstant %8 3.0 -%69 = OpConstantComposite %12 %68 %68 -%70 = OpConstantComposite %21 %65 %67 %69 -%71 = OpConstantComposite %22 %70 -%72 = OpConstant %8 6.0 +%59 = OpTypeStruct %23 +%60 = OpTypePointer StorageBuffer %59 +%58 = OpVariable %60 StorageBuffer +%62 = OpTypeStruct %26 +%63 = OpTypePointer Uniform %62 +%61 = OpVariable %63 Uniform +%66 = OpTypeFunction %2 +%67 = OpTypePointer Uniform %22 +%69 = OpConstant %5 1 +%70 = OpConstant %8 1.0 +%71 = OpConstantComposite %12 %70 %70 +%72 = OpConstant %8 2.0 %73 = OpConstantComposite %12 %72 %72 -%74 = OpConstant %8 5.0 +%74 = OpConstant %8 3.0 %75 = OpConstantComposite %12 %74 %74 -%76 = OpConstant %8 4.0 -%77 = OpConstantComposite %12 %76 %76 -%78 = OpConstantComposite %21 %73 %75 %77 -%79 = OpConstant %8 9.0 -%80 = OpConstantComposite %12 %79 %79 -%81 = OpConstant %8 90.0 -%82 = OpConstantComposite %12 %81 %81 -%83 = OpConstant %8 10.0 -%84 = OpConstant %8 20.0 -%85 = OpConstant %8 30.0 -%86 = OpConstant %8 40.0 -%88 = OpTypePointer Function %5 -%90 = OpTypePointer Function %22 -%94 = OpTypePointer Uniform %21 -%97 = OpTypePointer Uniform %12 -%103 = OpTypePointer Uniform %8 -%104 = OpConstant %3 1 -%119 = OpTypePointer Function %21 -%121 = OpTypePointer Function %12 -%125 = OpTypePointer Function %8 -%136 = OpTypePointer Uniform %26 -%138 = OpConstantNull %25 -%139 = OpConstantComposite %26 %138 -%140 = OpConstant %8 8.0 -%141 = OpConstantComposite %12 %140 %140 -%142 = OpConstant %8 7.0 -%143 = OpConstantComposite %12 %142 %142 -%144 = OpConstantComposite %24 %141 %143 %73 %75 -%147 = OpTypePointer Function %26 -%151 = OpTypePointer Uniform %25 -%154 = OpTypePointer Uniform %24 -%176 = OpTypePointer Function %25 -%178 = OpTypePointer Function %24 -%194 = OpTypeFunction %8 %27 -%200 = OpTypeFunction %8 %29 -%207 = OpTypeFunction %2 %33 -%208 = OpConstant %3 42 -%213 = OpTypeFunction %2 %35 -%214 = OpConstantComposite %31 %64 %64 %64 %64 -%215 = OpConstantComposite %31 %66 %66 %66 %66 -%216 = OpConstantComposite %34 %214 %215 -%221 = OpTypeFunction %3 %37 -%228 = OpTypeFunction %2 %37 -%234 = OpTypeFunction %3 %40 -%241 = OpTypeFunction %2 %40 -%246 = OpTypePointer Input %3 -%245 = OpVariable %246 Input -%249 = OpTypePointer Output %31 -%248 = OpVariable %249 Output -%252 = OpTypePointer StorageBuffer %23 -%255 = OpConstant %8 0.0 -%256 = OpConstant %3 3 -%257 = OpConstant %5 3 -%258 = OpConstant %5 4 -%259 = OpConstant %5 5 -%260 = OpConstant %5 42 -%261 = OpConstantNull %29 -%264 = OpTypePointer Function %32 -%265 = OpConstantNull %32 -%270 = OpTypePointer StorageBuffer %9 -%273 = OpTypePointer StorageBuffer %18 -%276 = OpTypePointer StorageBuffer %10 -%277 = OpTypePointer StorageBuffer %8 -%280 = OpTypePointer StorageBuffer %19 -%283 = OpTypePointer StorageBuffer %7 -%284 = OpTypePointer StorageBuffer %5 -%296 = OpTypeVector %5 4 -%302 = OpVariable %249 Output -%305 = OpConstantComposite %10 %255 %255 %255 -%306 = OpConstantComposite %10 %64 %64 %64 -%307 = OpConstantComposite %10 %66 %66 %66 -%308 = OpConstantComposite %10 %68 %68 %68 -%309 = OpConstantComposite %9 %305 %306 %307 %308 -%310 = OpConstantComposite %17 %41 %41 -%311 = OpConstantComposite %17 %104 %104 -%312 = OpConstantComposite %18 %310 %311 -%313 = OpConstantNull %23 -%314 = OpConstantComposite %31 %255 %255 %255 %255 -%322 = OpConstant %3 33 -%323 = OpConstantComposite %31 %72 %72 %72 %72 -%324 = OpConstantComposite %31 %142 %142 %142 %142 -%325 = OpConstantComposite %34 %323 %324 -%334 = OpConstantNull %36 -%336 = OpConstantNull %38 -%59 = OpFunction %2 None %60 -%58 = OpLabel -%87 = OpVariable %88 Function %63 -%89 = OpVariable %90 Function %71 -%62 = OpAccessChain %61 %49 %41 -OpBranch %91 -%91 = OpLabel -%92 = OpLoad %5 %87 -%93 = OpISub %5 %92 %63 -OpStore %87 %93 -%95 = OpAccessChain %94 %62 %41 -%96 = OpLoad %21 %95 -%98 = OpAccessChain %97 %62 %41 %41 -%99 = OpLoad %12 %98 -%100 = OpLoad %5 %87 -%101 = OpAccessChain %97 %62 %41 %100 -%102 = OpLoad %12 %101 -%105 = OpAccessChain %103 %62 %41 %41 %104 -%106 = OpLoad %8 %105 -%107 = OpLoad %5 %87 -%108 = OpAccessChain %103 %62 %41 %41 %107 -%109 = OpLoad %8 %108 -%110 = OpLoad %5 %87 -%111 = OpAccessChain %103 %62 %41 %110 %104 -%112 = OpLoad %8 %111 -%113 = OpLoad %5 %87 -%114 = OpLoad %5 %87 -%115 = OpAccessChain %103 %62 %41 %113 %114 -%116 = OpLoad %8 %115 -%117 = OpLoad %5 %87 -%118 = OpIAdd %5 %117 %63 -OpStore %87 %118 -%120 = OpAccessChain %119 %89 %41 -OpStore %120 %78 -%122 = OpAccessChain %121 %89 %41 %41 -OpStore %122 %80 -%123 = OpLoad %5 %87 -%124 = OpAccessChain %121 %89 %41 %123 -OpStore %124 %82 -%126 = OpAccessChain %125 %89 %41 %41 %104 -OpStore %126 %83 -%127 = OpLoad %5 %87 -%128 = OpAccessChain %125 %89 %41 %41 %127 -OpStore %128 %84 -%129 = OpLoad %5 %87 -%130 = OpAccessChain %125 %89 %41 %129 %104 -OpStore %130 %85 -%131 = OpLoad %5 %87 -%132 = OpLoad %5 %87 -%133 = OpAccessChain %125 %89 %41 %131 %132 -OpStore %133 %86 +%76 = OpConstantComposite %21 %71 %73 %75 +%77 = OpConstantComposite %22 %76 +%78 = OpConstant %8 6.0 +%79 = OpConstantComposite %12 %78 %78 +%80 = OpConstant %8 5.0 +%81 = OpConstantComposite %12 %80 %80 +%82 = OpConstant %8 4.0 +%83 = OpConstantComposite %12 %82 %82 +%84 = OpConstantComposite %21 %79 %81 %83 +%85 = OpConstant %8 9.0 +%86 = OpConstantComposite %12 %85 %85 +%87 = OpConstant %8 90.0 +%88 = OpConstantComposite %12 %87 %87 +%89 = OpConstant %8 10.0 +%90 = OpConstant %8 20.0 +%91 = OpConstant %8 30.0 +%92 = OpConstant %8 40.0 +%94 = OpTypePointer Function %5 +%96 = OpTypePointer Function %22 +%100 = OpTypePointer Uniform %21 +%103 = OpTypePointer Uniform %12 +%109 = OpTypePointer Uniform %8 +%124 = OpTypePointer Function %21 +%126 = OpTypePointer Function %12 +%130 = OpTypePointer Function %8 +%141 = OpTypePointer Uniform %26 +%143 = OpConstantNull %25 +%144 = OpConstantComposite %26 %143 +%145 = OpConstant %8 8.0 +%146 = OpConstantComposite %12 %145 %145 +%147 = OpConstant %8 7.0 +%148 = OpConstantComposite %12 %147 %147 +%149 = OpConstantComposite %24 %146 %148 %79 %81 +%152 = OpTypePointer Function %26 +%156 = OpTypePointer Uniform %25 +%159 = OpTypePointer Uniform %24 +%181 = OpTypePointer Function %25 +%183 = OpTypePointer Function %24 +%199 = OpTypeFunction %8 %27 +%205 = OpTypeFunction %8 %29 +%212 = OpTypeFunction %2 %33 +%213 = OpConstant %3 42 +%218 = OpTypeFunction %2 %35 +%219 = OpConstantComposite %31 %70 %70 %70 %70 +%220 = OpConstantComposite %31 %72 %72 %72 %72 +%221 = OpConstantComposite %34 %219 %220 +%226 = OpTypeFunction %3 %37 +%233 = OpTypeFunction %2 %37 +%239 = OpTypeFunction %3 %40 +%246 = OpTypeFunction %2 %40 +%252 = OpTypeFunction %41 %41 +%254 = OpTypePointer Function %42 +%255 = OpConstantNull %42 +%258 = OpTypePointer Function %41 +%263 = OpTypeFunction %5 +%264 = OpConstant %5 42 +%265 = OpConstantComposite %44 %264 +%267 = OpTypePointer Function %44 +%273 = OpConstantNull %46 +%286 = OpTypePointer Function %46 +%288 = OpTypePointer Function %45 +%289 = OpConstantNull %45 +%291 = OpConstantNull %5 +%307 = OpTypePointer Input %3 +%306 = OpVariable %307 Input +%310 = OpTypePointer Output %31 +%309 = OpVariable %310 Output +%313 = OpTypePointer StorageBuffer %23 +%316 = OpConstant %8 0.0 +%317 = OpConstant %3 3 +%318 = OpConstant %5 3 +%319 = OpConstant %5 4 +%320 = OpConstant %5 5 +%321 = OpConstantNull %29 +%324 = OpTypePointer Function %32 +%325 = OpConstantNull %32 +%330 = OpTypePointer StorageBuffer %9 +%333 = OpTypePointer StorageBuffer %18 +%336 = OpTypePointer StorageBuffer %10 +%337 = OpTypePointer StorageBuffer %8 +%340 = OpTypePointer StorageBuffer %19 +%343 = OpTypePointer StorageBuffer %7 +%344 = OpTypePointer StorageBuffer %5 +%356 = OpTypeVector %5 4 +%362 = OpVariable %310 Output +%365 = OpConstantComposite %10 %316 %316 %316 +%366 = OpConstantComposite %10 %70 %70 %70 +%367 = OpConstantComposite %10 %72 %72 %72 +%368 = OpConstantComposite %10 %74 %74 %74 +%369 = OpConstantComposite %9 %365 %366 %367 %368 +%370 = OpConstantComposite %17 %47 %47 +%371 = OpConstantComposite %17 %43 %43 +%372 = OpConstantComposite %18 %370 %371 +%373 = OpConstantNull %23 +%374 = OpConstantComposite %31 %316 %316 %316 %316 +%382 = OpConstant %3 33 +%383 = OpConstantComposite %31 %78 %78 %78 %78 +%384 = OpConstantComposite %31 %147 %147 %147 %147 +%385 = OpConstantComposite %34 %383 %384 +%394 = OpConstantNull %36 +%396 = OpConstantNull %38 +%65 = OpFunction %2 None %66 +%64 = OpLabel +%93 = OpVariable %94 Function %69 +%95 = OpVariable %96 Function %77 +%68 = OpAccessChain %67 %55 %47 +OpBranch %97 +%97 = OpLabel +%98 = OpLoad %5 %93 +%99 = OpISub %5 %98 %69 +OpStore %93 %99 +%101 = OpAccessChain %100 %68 %47 +%102 = OpLoad %21 %101 +%104 = OpAccessChain %103 %68 %47 %47 +%105 = OpLoad %12 %104 +%106 = OpLoad %5 %93 +%107 = OpAccessChain %103 %68 %47 %106 +%108 = OpLoad %12 %107 +%110 = OpAccessChain %109 %68 %47 %47 %43 +%111 = OpLoad %8 %110 +%112 = OpLoad %5 %93 +%113 = OpAccessChain %109 %68 %47 %47 %112 +%114 = OpLoad %8 %113 +%115 = OpLoad %5 %93 +%116 = OpAccessChain %109 %68 %47 %115 %43 +%117 = OpLoad %8 %116 +%118 = OpLoad %5 %93 +%119 = OpLoad %5 %93 +%120 = OpAccessChain %109 %68 %47 %118 %119 +%121 = OpLoad %8 %120 +%122 = OpLoad %5 %93 +%123 = OpIAdd %5 %122 %69 +OpStore %93 %123 +%125 = OpAccessChain %124 %95 %47 +OpStore %125 %84 +%127 = OpAccessChain %126 %95 %47 %47 +OpStore %127 %86 +%128 = OpLoad %5 %93 +%129 = OpAccessChain %126 %95 %47 %128 +OpStore %129 %88 +%131 = OpAccessChain %130 %95 %47 %47 %43 +OpStore %131 %89 +%132 = OpLoad %5 %93 +%133 = OpAccessChain %130 %95 %47 %47 %132 +OpStore %133 %90 +%134 = OpLoad %5 %93 +%135 = OpAccessChain %130 %95 %47 %134 %43 +OpStore %135 %91 +%136 = OpLoad %5 %93 +%137 = OpLoad %5 %93 +%138 = OpAccessChain %130 %95 %47 %136 %137 +OpStore %138 %92 OpReturn OpFunctionEnd -%135 = OpFunction %2 None %60 -%134 = OpLabel -%145 = OpVariable %88 Function %63 -%146 = OpVariable %147 Function %139 -%137 = OpAccessChain %136 %55 %41 -OpBranch %148 -%148 = OpLabel -%149 = OpLoad %5 %145 -%150 = OpISub %5 %149 %63 -OpStore %145 %150 -%152 = OpAccessChain %151 %137 %41 -%153 = OpLoad %25 %152 -%155 = OpAccessChain %154 %137 %41 %41 -%156 = OpLoad %24 %155 -%157 = OpAccessChain %97 %137 %41 %41 %41 -%158 = OpLoad %12 %157 -%159 = OpLoad %5 %145 -%160 = OpAccessChain %97 %137 %41 %41 %159 -%161 = OpLoad %12 %160 -%162 = OpAccessChain %103 %137 %41 %41 %41 %104 -%163 = OpLoad %8 %162 -%164 = OpLoad %5 %145 -%165 = OpAccessChain %103 %137 %41 %41 %41 %164 -%166 = OpLoad %8 %165 -%167 = OpLoad %5 %145 -%168 = OpAccessChain %103 %137 %41 %41 %167 %104 -%169 = OpLoad %8 %168 -%170 = OpLoad %5 %145 -%171 = OpLoad %5 %145 -%172 = OpAccessChain %103 %137 %41 %41 %170 %171 -%173 = OpLoad %8 %172 -%174 = OpLoad %5 %145 -%175 = OpIAdd %5 %174 %63 -OpStore %145 %175 -%177 = OpAccessChain %176 %146 %41 -OpStore %177 %138 -%179 = OpAccessChain %178 %146 %41 %41 -OpStore %179 %144 -%180 = OpAccessChain %121 %146 %41 %41 %41 -OpStore %180 %80 -%181 = OpLoad %5 %145 -%182 = OpAccessChain %121 %146 %41 %41 %181 -OpStore %182 %82 -%183 = OpAccessChain %125 %146 %41 %41 %41 %104 -OpStore %183 %83 -%184 = OpLoad %5 %145 -%185 = OpAccessChain %125 %146 %41 %41 %41 %184 -OpStore %185 %84 -%186 = OpLoad %5 %145 -%187 = OpAccessChain %125 %146 %41 %41 %186 %104 -OpStore %187 %85 -%188 = OpLoad %5 %145 -%189 = OpLoad %5 %145 -%190 = OpAccessChain %125 %146 %41 %41 %188 %189 -OpStore %190 %86 +%140 = OpFunction %2 None %66 +%139 = OpLabel +%150 = OpVariable %94 Function %69 +%151 = OpVariable %152 Function %144 +%142 = OpAccessChain %141 %61 %47 +OpBranch %153 +%153 = OpLabel +%154 = OpLoad %5 %150 +%155 = OpISub %5 %154 %69 +OpStore %150 %155 +%157 = OpAccessChain %156 %142 %47 +%158 = OpLoad %25 %157 +%160 = OpAccessChain %159 %142 %47 %47 +%161 = OpLoad %24 %160 +%162 = OpAccessChain %103 %142 %47 %47 %47 +%163 = OpLoad %12 %162 +%164 = OpLoad %5 %150 +%165 = OpAccessChain %103 %142 %47 %47 %164 +%166 = OpLoad %12 %165 +%167 = OpAccessChain %109 %142 %47 %47 %47 %43 +%168 = OpLoad %8 %167 +%169 = OpLoad %5 %150 +%170 = OpAccessChain %109 %142 %47 %47 %47 %169 +%171 = OpLoad %8 %170 +%172 = OpLoad %5 %150 +%173 = OpAccessChain %109 %142 %47 %47 %172 %43 +%174 = OpLoad %8 %173 +%175 = OpLoad %5 %150 +%176 = OpLoad %5 %150 +%177 = OpAccessChain %109 %142 %47 %47 %175 %176 +%178 = OpLoad %8 %177 +%179 = OpLoad %5 %150 +%180 = OpIAdd %5 %179 %69 +OpStore %150 %180 +%182 = OpAccessChain %181 %151 %47 +OpStore %182 %143 +%184 = OpAccessChain %183 %151 %47 %47 +OpStore %184 %149 +%185 = OpAccessChain %126 %151 %47 %47 %47 +OpStore %185 %86 +%186 = OpLoad %5 %150 +%187 = OpAccessChain %126 %151 %47 %47 %186 +OpStore %187 %88 +%188 = OpAccessChain %130 %151 %47 %47 %47 %43 +OpStore %188 %89 +%189 = OpLoad %5 %150 +%190 = OpAccessChain %130 %151 %47 %47 %47 %189 +OpStore %190 %90 +%191 = OpLoad %5 %150 +%192 = OpAccessChain %130 %151 %47 %47 %191 %43 +OpStore %192 %91 +%193 = OpLoad %5 %150 +%194 = OpLoad %5 %150 +%195 = OpAccessChain %130 %151 %47 %47 %193 %194 +OpStore %195 %92 OpReturn OpFunctionEnd -%193 = OpFunction %8 None %194 -%192 = OpFunctionParameter %27 -%191 = OpLabel -OpBranch %195 -%195 = OpLabel -%196 = OpLoad %8 %192 -OpReturnValue %196 +%198 = OpFunction %8 None %199 +%197 = OpFunctionParameter %27 +%196 = OpLabel +OpBranch %200 +%200 = OpLabel +%201 = OpLoad %8 %197 +OpReturnValue %201 OpFunctionEnd -%199 = OpFunction %8 None %200 -%198 = OpFunctionParameter %29 -%197 = OpLabel -OpBranch %201 -%201 = OpLabel -%202 = OpCompositeExtract %28 %198 4 -%203 = OpCompositeExtract %8 %202 9 -OpReturnValue %203 +%204 = OpFunction %8 None %205 +%203 = OpFunctionParameter %29 +%202 = OpLabel +OpBranch %206 +%206 = OpLabel +%207 = OpCompositeExtract %28 %203 4 +%208 = OpCompositeExtract %8 %207 9 +OpReturnValue %208 OpFunctionEnd -%206 = OpFunction %2 None %207 -%205 = OpFunctionParameter %33 -%204 = OpLabel -OpBranch %209 +%211 = OpFunction %2 None %212 +%210 = OpFunctionParameter %33 %209 = OpLabel -OpStore %205 %208 +OpBranch %214 +%214 = OpLabel +OpStore %210 %213 OpReturn OpFunctionEnd -%212 = OpFunction %2 None %213 -%211 = OpFunctionParameter %35 -%210 = OpLabel -OpBranch %217 -%217 = OpLabel -OpStore %211 %216 -OpReturn -OpFunctionEnd -%220 = OpFunction %3 None %221 -%219 = OpFunctionParameter %37 -%218 = OpLabel +%217 = OpFunction %2 None %218 +%216 = OpFunctionParameter %35 +%215 = OpLabel OpBranch %222 %222 = OpLabel -%223 = OpAccessChain %33 %219 %41 -%224 = OpLoad %3 %223 -OpReturnValue %224 +OpStore %216 %221 +OpReturn +OpFunctionEnd +%225 = OpFunction %3 None %226 +%224 = OpFunctionParameter %37 +%223 = OpLabel +OpBranch %227 +%227 = OpLabel +%228 = OpAccessChain %33 %224 %47 +%229 = OpLoad %3 %228 +OpReturnValue %229 OpFunctionEnd -%227 = OpFunction %2 None %228 -%226 = OpFunctionParameter %37 -%225 = OpLabel -OpBranch %229 -%229 = OpLabel -%230 = OpAccessChain %33 %226 %41 -OpStore %230 %16 +%232 = OpFunction %2 None %233 +%231 = OpFunctionParameter %37 +%230 = OpLabel +OpBranch %234 +%234 = OpLabel +%235 = OpAccessChain %33 %231 %47 +OpStore %235 %16 OpReturn OpFunctionEnd -%233 = OpFunction %3 None %234 -%232 = OpFunctionParameter %40 -%231 = OpLabel -OpBranch %235 -%235 = OpLabel -%236 = OpAccessChain %33 %232 %104 -%237 = OpLoad %3 %236 -OpReturnValue %237 +%238 = OpFunction %3 None %239 +%237 = OpFunctionParameter %40 +%236 = OpLabel +OpBranch %240 +%240 = OpLabel +%241 = OpAccessChain %33 %237 %43 +%242 = OpLoad %3 %241 +OpReturnValue %242 OpFunctionEnd -%240 = OpFunction %2 None %241 -%239 = OpFunctionParameter %40 -%238 = OpLabel -OpBranch %242 -%242 = OpLabel -%243 = OpAccessChain %33 %239 %104 -OpStore %243 %16 +%245 = OpFunction %2 None %246 +%244 = OpFunctionParameter %40 +%243 = OpLabel +OpBranch %247 +%247 = OpLabel +%248 = OpAccessChain %33 %244 %43 +OpStore %248 %16 OpReturn OpFunctionEnd -%250 = OpFunction %2 None %60 -%244 = OpLabel -%262 = OpVariable %27 Function %255 -%263 = OpVariable %264 Function %265 -%247 = OpLoad %3 %245 -%251 = OpAccessChain %61 %49 %41 -%253 = OpAccessChain %252 %52 %41 -%254 = OpAccessChain %136 %55 %41 -OpBranch %266 -%266 = OpLabel -%267 = OpLoad %8 %262 -OpStore %262 %64 -%268 = OpFunctionCall %2 %59 -%269 = OpFunctionCall %2 %135 -%271 = OpAccessChain %270 %47 %41 -%272 = OpLoad %9 %271 -%274 = OpAccessChain %273 %47 %39 -%275 = OpLoad %18 %274 -%278 = OpAccessChain %277 %47 %41 %256 %41 -%279 = OpLoad %8 %278 -%281 = OpArrayLength %3 %47 5 -%282 = OpISub %3 %281 %14 -%285 = OpAccessChain %284 %47 %30 %282 %41 -%286 = OpLoad %5 %285 -%287 = OpLoad %23 %253 -%288 = OpFunctionCall %8 %193 %262 -%289 = OpConvertFToS %5 %279 -%290 = OpCompositeConstruct %32 %286 %289 %257 %258 %259 -OpStore %263 %290 -%291 = OpIAdd %3 %247 %104 -%292 = OpAccessChain %88 %263 %291 -OpStore %292 %260 -%293 = OpAccessChain %88 %263 %247 -%294 = OpLoad %5 %293 -%295 = OpFunctionCall %8 %199 %261 -%297 = OpCompositeConstruct %296 %294 %294 %294 %294 -%298 = OpConvertSToF %31 %297 -%299 = OpMatrixTimesVector %10 %272 %298 -%300 = OpCompositeConstruct %31 %299 %66 -OpStore %248 %300 +%251 = OpFunction %41 None %252 +%250 = OpFunctionParameter %41 +%249 = OpLabel +%253 = OpVariable %254 Function %255 +OpBranch %256 +%256 = OpLabel +%257 = OpCompositeConstruct %42 %250 +OpStore %253 %257 +%259 = OpAccessChain %258 %253 %47 +%260 = OpLoad %41 %259 +OpReturnValue %260 +OpFunctionEnd +%262 = OpFunction %5 None %263 +%261 = OpLabel +%266 = OpVariable %267 Function %265 +OpBranch %268 +%268 = OpLabel +%269 = OpAccessChain %94 %266 %47 +%270 = OpLoad %5 %269 +OpReturnValue %270 +OpFunctionEnd +%272 = OpFunction %5 None %263 +%271 = OpLabel +OpBranch %274 +%274 = OpLabel +%275 = OpCompositeExtract %45 %273 0 +%276 = OpCompositeExtract %5 %275 0 +%277 = OpCompositeExtract %3 %273 1 +%278 = OpBitcast %3 %276 +%279 = OpINotEqual %41 %277 %278 +OpSelectionMerge %280 None +OpBranchConditional %279 %280 %280 +%280 = OpLabel +%281 = OpCompositeExtract %45 %273 0 +%282 = OpCompositeExtract %5 %281 0 +OpReturnValue %282 +OpFunctionEnd +%284 = OpFunction %5 None %263 +%283 = OpLabel +%285 = OpVariable %286 Function %273 +%287 = OpVariable %288 Function %289 +%290 = OpVariable %94 Function %291 +OpBranch %292 +%292 = OpLabel +%293 = OpAccessChain %288 %285 %47 +%294 = OpLoad %45 %293 +OpStore %287 %294 +%295 = OpAccessChain %94 %287 %47 +%296 = OpLoad %5 %295 +OpStore %290 %296 +%297 = OpAccessChain %33 %285 %43 +%298 = OpLoad %3 %297 +%299 = OpLoad %5 %290 +%300 = OpBitcast %3 %299 +%301 = OpINotEqual %41 %298 %300 +OpSelectionMerge %302 None +OpBranchConditional %301 %302 %302 +%302 = OpLabel +%303 = OpAccessChain %94 %285 %47 %47 +%304 = OpLoad %5 %303 +OpReturnValue %304 +OpFunctionEnd +%311 = OpFunction %2 None %66 +%305 = OpLabel +%322 = OpVariable %27 Function %316 +%323 = OpVariable %324 Function %325 +%308 = OpLoad %3 %306 +%312 = OpAccessChain %67 %55 %47 +%314 = OpAccessChain %313 %58 %47 +%315 = OpAccessChain %141 %61 %47 +OpBranch %326 +%326 = OpLabel +%327 = OpLoad %8 %322 +OpStore %322 %70 +%328 = OpFunctionCall %2 %65 +%329 = OpFunctionCall %2 %140 +%331 = OpAccessChain %330 %53 %47 +%332 = OpLoad %9 %331 +%334 = OpAccessChain %333 %53 %39 +%335 = OpLoad %18 %334 +%338 = OpAccessChain %337 %53 %47 %317 %47 +%339 = OpLoad %8 %338 +%341 = OpArrayLength %3 %53 5 +%342 = OpISub %3 %341 %14 +%345 = OpAccessChain %344 %53 %30 %342 %47 +%346 = OpLoad %5 %345 +%347 = OpLoad %23 %314 +%348 = OpFunctionCall %8 %198 %322 +%349 = OpConvertFToS %5 %339 +%350 = OpCompositeConstruct %32 %346 %349 %318 %319 %320 +OpStore %323 %350 +%351 = OpIAdd %3 %308 %43 +%352 = OpAccessChain %94 %323 %351 +OpStore %352 %264 +%353 = OpAccessChain %94 %323 %308 +%354 = OpLoad %5 %353 +%355 = OpFunctionCall %8 %204 %321 +%357 = OpCompositeConstruct %356 %354 %354 %354 %354 +%358 = OpConvertSToF %31 %357 +%359 = OpMatrixTimesVector %10 %332 %358 +%360 = OpCompositeConstruct %31 %359 %72 +OpStore %309 %360 OpReturn OpFunctionEnd -%303 = OpFunction %2 None %60 -%301 = OpLabel -%304 = OpAccessChain %252 %52 %41 -OpBranch %315 -%315 = OpLabel -%316 = OpAccessChain %277 %47 %41 %104 %14 -OpStore %316 %64 -%317 = OpAccessChain %270 %47 %41 -OpStore %317 %309 -%318 = OpAccessChain %273 %47 %39 -OpStore %318 %312 -%319 = OpAccessChain %284 %47 %30 %104 %41 -OpStore %319 %63 -OpStore %304 %313 -OpStore %302 %314 +%363 = OpFunction %2 None %66 +%361 = OpLabel +%364 = OpAccessChain %313 %58 %47 +OpBranch %375 +%375 = OpLabel +%376 = OpAccessChain %337 %53 %47 %43 %14 +OpStore %376 %70 +%377 = OpAccessChain %330 %53 %47 +OpStore %377 %369 +%378 = OpAccessChain %333 %53 %39 +OpStore %378 %372 +%379 = OpAccessChain %344 %53 %30 %43 %47 +OpStore %379 %69 +OpStore %364 %373 +OpStore %362 %374 OpReturn OpFunctionEnd -%321 = OpFunction %2 None %60 -%320 = OpLabel -%326 = OpVariable %33 Function %322 -%327 = OpVariable %35 Function %325 -OpBranch %328 -%328 = OpLabel -%329 = OpFunctionCall %2 %206 %326 -%330 = OpFunctionCall %2 %212 %327 +%381 = OpFunction %2 None %66 +%380 = OpLabel +%386 = OpVariable %33 Function %382 +%387 = OpVariable %35 Function %385 +OpBranch %388 +%388 = OpLabel +%389 = OpFunctionCall %2 %211 %386 +%390 = OpFunctionCall %2 %217 %387 OpReturn OpFunctionEnd -%332 = OpFunction %2 None %60 -%331 = OpLabel -%333 = OpVariable %37 Function %334 -%335 = OpVariable %40 Function %336 -OpBranch %337 -%337 = OpLabel -%338 = OpFunctionCall %2 %227 %333 -%339 = OpFunctionCall %3 %220 %333 -%340 = OpFunctionCall %2 %240 %335 -%341 = OpFunctionCall %3 %233 %335 +%392 = OpFunction %2 None %66 +%391 = OpLabel +%393 = OpVariable %37 Function %394 +%395 = OpVariable %40 Function %396 +OpBranch %397 +%397 = OpLabel +%398 = OpFunctionCall %2 %232 %393 +%399 = OpFunctionCall %3 %225 %393 +%400 = OpFunctionCall %2 %245 %395 +%401 = OpFunctionCall %3 %238 %395 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/array-in-function-return-type.spvasm b/naga/tests/out/spv/array-in-function-return-type.spvasm index 79e94fba8a..146e032f35 100644 --- a/naga/tests/out/spv/array-in-function-return-type.spvasm +++ b/naga/tests/out/spv/array-in-function-return-type.spvasm @@ -1,42 +1,58 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 26 +; Bound: 38 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %18 "main" %16 -OpExecutionMode %18 OriginUpperLeft +OpEntryPoint Fragment %28 "main" %26 +OpExecutionMode %28 OriginUpperLeft OpDecorate %4 ArrayStride 4 -OpDecorate %16 Location 0 +OpDecorate %7 ArrayStride 8 +OpDecorate %26 Location 0 %2 = OpTypeVoid %3 = OpTypeFloat 32 %6 = OpTypeInt 32 0 %5 = OpConstant %6 2 %4 = OpTypeArray %3 %5 -%7 = OpTypeVector %3 4 -%10 = OpTypeFunction %4 -%11 = OpConstant %3 1.0 -%12 = OpConstant %3 2.0 -%13 = OpConstantComposite %4 %11 %12 -%17 = OpTypePointer Output %7 -%16 = OpVariable %17 Output -%19 = OpTypeFunction %2 -%20 = OpConstant %3 0.0 -%9 = OpFunction %4 None %10 -%8 = OpLabel -OpBranch %14 -%14 = OpLabel -OpReturnValue %13 +%8 = OpConstant %6 3 +%7 = OpTypeArray %4 %8 +%9 = OpTypeVector %3 4 +%12 = OpTypeFunction %4 +%13 = OpConstant %3 1.0 +%14 = OpConstant %3 2.0 +%15 = OpConstantComposite %4 %13 %14 +%19 = OpTypeFunction %7 +%27 = OpTypePointer Output %9 +%26 = OpVariable %27 Output +%29 = OpTypeFunction %2 +%30 = OpConstant %3 0.0 +%11 = OpFunction %4 None %12 +%10 = OpLabel +OpBranch %16 +%16 = OpLabel +OpReturnValue %15 OpFunctionEnd -%18 = OpFunction %2 None %19 -%15 = OpLabel -OpBranch %21 -%21 = OpLabel -%22 = OpFunctionCall %4 %9 -%23 = OpCompositeExtract %3 %22 0 -%24 = OpCompositeExtract %3 %22 1 -%25 = OpCompositeConstruct %7 %23 %24 %20 %11 -OpStore %16 %25 +%18 = OpFunction %7 None %19 +%17 = OpLabel +OpBranch %20 +%20 = OpLabel +%21 = OpFunctionCall %4 %11 +%22 = OpFunctionCall %4 %11 +%23 = OpFunctionCall %4 %11 +%24 = OpCompositeConstruct %7 %21 %22 %23 +OpReturnValue %24 +OpFunctionEnd +%28 = OpFunction %2 None %29 +%25 = OpLabel +OpBranch %31 +%31 = OpLabel +%32 = OpFunctionCall %7 %18 +%33 = OpCompositeExtract %4 %32 0 +%34 = OpCompositeExtract %3 %33 0 +%35 = OpCompositeExtract %4 %32 0 +%36 = OpCompositeExtract %3 %35 1 +%37 = OpCompositeConstruct %9 %34 %36 %30 %13 +OpStore %26 %37 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/ray-query.spvasm b/naga/tests/out/spv/ray-query.spvasm index 5279bfc2e1..d9a9edc984 100644 --- a/naga/tests/out/spv/ray-query.spvasm +++ b/naga/tests/out/spv/ray-query.spvasm @@ -1,16 +1,16 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 136 +; Bound: 160 OpCapability Shader OpCapability RayQueryKHR OpExtension "SPV_KHR_ray_query" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %84 "main" %15 %17 -OpEntryPoint GLCompute %105 "main_candidate" %15 %17 -OpExecutionMode %84 LocalSize 1 1 1 -OpExecutionMode %105 LocalSize 1 1 1 +OpEntryPoint GLCompute %123 "main" %15 %17 +OpEntryPoint GLCompute %143 "main_candidate" %15 %17 +OpExecutionMode %123 LocalSize 1 1 1 +OpExecutionMode %143 LocalSize 1 1 1 OpMemberDecorate %10 0 Offset 0 OpMemberDecorate %10 1 Offset 4 OpMemberDecorate %10 2 Offset 8 @@ -64,20 +64,87 @@ OpMemberDecorate %18 0 Offset 0 %29 = OpConstant %3 0.1 %30 = OpConstant %3 100.0 %32 = OpTypePointer Function %11 -%50 = OpConstant %6 1 -%67 = OpTypeFunction %4 %4 %10 -%68 = OpConstant %3 1.0 -%69 = OpConstant %3 2.4 -%70 = OpConstant %3 0.0 -%85 = OpTypeFunction %2 -%87 = OpTypePointer StorageBuffer %13 -%88 = OpConstant %6 0 -%90 = OpConstantComposite %4 %70 %70 %70 -%91 = OpConstantComposite %4 %70 %68 %70 -%94 = OpTypePointer StorageBuffer %6 -%99 = OpTypePointer StorageBuffer %4 -%108 = OpConstantComposite %12 %27 %28 %29 %30 %90 %91 -%109 = OpConstant %6 3 +%50 = OpTypePointer Function %10 +%51 = OpTypePointer Function %6 +%52 = OpTypePointer Function %9 +%53 = OpTypePointer Function %7 +%54 = OpTypePointer Function %8 +%55 = OpTypePointer Function %3 +%56 = OpTypeFunction %10 %32 +%58 = OpConstantNull %10 +%62 = OpConstant %6 1 +%64 = OpConstant %6 0 +%76 = OpConstant %6 2 +%78 = OpConstant %6 3 +%81 = OpConstant %6 5 +%83 = OpConstant %6 6 +%85 = OpConstant %6 9 +%87 = OpConstant %6 10 +%96 = OpConstant %6 7 +%98 = OpConstant %6 8 +%106 = OpTypeFunction %4 %4 %10 +%107 = OpConstant %3 1.0 +%108 = OpConstant %3 2.4 +%109 = OpConstant %3 0.0 +%124 = OpTypeFunction %2 +%126 = OpTypePointer StorageBuffer %13 +%128 = OpConstantComposite %4 %109 %109 %109 +%129 = OpConstantComposite %4 %109 %107 %109 +%132 = OpTypePointer StorageBuffer %6 +%137 = OpTypePointer StorageBuffer %4 +%146 = OpConstantComposite %12 %27 %28 %29 %30 %128 %129 +%57 = OpFunction %10 None %56 +%59 = OpFunctionParameter %32 +%60 = OpLabel +%61 = OpVariable %50 Function %58 +%63 = OpRayQueryGetIntersectionTypeKHR %6 %59 %62 +%65 = OpAccessChain %51 %61 %64 +OpStore %65 %63 +%66 = OpINotEqual %8 %63 %64 +OpSelectionMerge %68 None +OpBranchConditional %66 %67 %68 +%67 = OpLabel +%69 = OpRayQueryGetIntersectionInstanceCustomIndexKHR %6 %59 %62 +%70 = OpRayQueryGetIntersectionInstanceIdKHR %6 %59 %62 +%71 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR %6 %59 %62 +%72 = OpRayQueryGetIntersectionGeometryIndexKHR %6 %59 %62 +%73 = OpRayQueryGetIntersectionPrimitiveIndexKHR %6 %59 %62 +%74 = OpRayQueryGetIntersectionObjectToWorldKHR %9 %59 %62 +%75 = OpRayQueryGetIntersectionWorldToObjectKHR %9 %59 %62 +%77 = OpAccessChain %51 %61 %76 +OpStore %77 %69 +%79 = OpAccessChain %51 %61 %78 +OpStore %79 %70 +%80 = OpAccessChain %51 %61 %27 +OpStore %80 %71 +%82 = OpAccessChain %51 %61 %81 +OpStore %82 %72 +%84 = OpAccessChain %51 %61 %83 +OpStore %84 %73 +%86 = OpAccessChain %52 %61 %85 +OpStore %86 %74 +%88 = OpAccessChain %52 %61 %87 +OpStore %88 %75 +%89 = OpIEqual %8 %63 %62 +%92 = OpRayQueryGetIntersectionTKHR %3 %59 %62 +%93 = OpAccessChain %55 %61 %62 +OpStore %93 %92 +OpSelectionMerge %91 None +OpBranchConditional %66 %90 %91 +%90 = OpLabel +%94 = OpRayQueryGetIntersectionBarycentricsKHR %7 %59 %62 +%95 = OpRayQueryGetIntersectionFrontFaceKHR %8 %59 %62 +%97 = OpAccessChain %53 %61 %96 +OpStore %97 %94 +%99 = OpAccessChain %54 %61 %98 +OpStore %99 %95 +OpBranch %91 +%91 = OpLabel +OpBranch %68 +%68 = OpLabel +%100 = OpLoad %10 %61 +OpReturnValue %100 +OpFunctionEnd %25 = OpFunction %10 None %26 %21 = OpFunctionParameter %4 %22 = OpFunctionParameter %4 @@ -114,90 +181,66 @@ OpBranch %44 %44 = OpLabel OpBranch %41 %42 = OpLabel -%51 = OpRayQueryGetIntersectionTypeKHR %6 %31 %50 -%52 = OpRayQueryGetIntersectionInstanceCustomIndexKHR %6 %31 %50 -%53 = OpRayQueryGetIntersectionInstanceIdKHR %6 %31 %50 -%54 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR %6 %31 %50 -%55 = OpRayQueryGetIntersectionGeometryIndexKHR %6 %31 %50 -%56 = OpRayQueryGetIntersectionPrimitiveIndexKHR %6 %31 %50 -%57 = OpRayQueryGetIntersectionTKHR %3 %31 %50 -%58 = OpRayQueryGetIntersectionBarycentricsKHR %7 %31 %50 -%59 = OpRayQueryGetIntersectionFrontFaceKHR %8 %31 %50 -%60 = OpRayQueryGetIntersectionObjectToWorldKHR %9 %31 %50 -%61 = OpRayQueryGetIntersectionWorldToObjectKHR %9 %31 %50 -%62 = OpCompositeConstruct %10 %51 %57 %52 %53 %54 %55 %56 %58 %59 %60 %61 -OpReturnValue %62 +%101 = OpFunctionCall %10 %57 %31 +OpReturnValue %101 OpFunctionEnd -%66 = OpFunction %4 None %67 -%64 = OpFunctionParameter %4 -%65 = OpFunctionParameter %10 -%63 = OpLabel -OpBranch %71 -%71 = OpLabel -%72 = OpCompositeExtract %9 %65 10 -%73 = OpCompositeConstruct %14 %64 %68 -%74 = OpMatrixTimesVector %4 %72 %73 -%75 = OpVectorShuffle %7 %74 %74 0 1 -%76 = OpExtInst %7 %1 Normalize %75 -%77 = OpVectorTimesScalar %7 %76 %69 -%78 = OpCompositeExtract %9 %65 9 -%79 = OpCompositeConstruct %14 %77 %70 %68 -%80 = OpMatrixTimesVector %4 %78 %79 -%81 = OpFSub %4 %64 %80 -%82 = OpExtInst %4 %1 Normalize %81 -OpReturnValue %82 +%105 = OpFunction %4 None %106 +%103 = OpFunctionParameter %4 +%104 = OpFunctionParameter %10 +%102 = OpLabel +OpBranch %110 +%110 = OpLabel +%111 = OpCompositeExtract %9 %104 10 +%112 = OpCompositeConstruct %14 %103 %107 +%113 = OpMatrixTimesVector %4 %111 %112 +%114 = OpVectorShuffle %7 %113 %113 0 1 +%115 = OpExtInst %7 %1 Normalize %114 +%116 = OpVectorTimesScalar %7 %115 %108 +%117 = OpCompositeExtract %9 %104 9 +%118 = OpCompositeConstruct %14 %116 %109 %107 +%119 = OpMatrixTimesVector %4 %117 %118 +%120 = OpFSub %4 %103 %119 +%121 = OpExtInst %4 %1 Normalize %120 +OpReturnValue %121 OpFunctionEnd -%84 = OpFunction %2 None %85 -%83 = OpLabel -%86 = OpLoad %5 %15 -%89 = OpAccessChain %87 %17 %88 -OpBranch %92 -%92 = OpLabel -%93 = OpFunctionCall %10 %25 %90 %91 %15 -%95 = OpCompositeExtract %6 %93 0 -%96 = OpIEqual %8 %95 %88 -%97 = OpSelect %6 %96 %50 %88 -%98 = OpAccessChain %94 %89 %88 -OpStore %98 %97 -%100 = OpCompositeExtract %3 %93 1 -%101 = OpVectorTimesScalar %4 %91 %100 -%102 = OpFunctionCall %4 %66 %101 %93 -%103 = OpAccessChain %99 %89 %50 -OpStore %103 %102 +%123 = OpFunction %2 None %124 +%122 = OpLabel +%125 = OpLoad %5 %15 +%127 = OpAccessChain %126 %17 %64 +OpBranch %130 +%130 = OpLabel +%131 = OpFunctionCall %10 %25 %128 %129 %15 +%133 = OpCompositeExtract %6 %131 0 +%134 = OpIEqual %8 %133 %64 +%135 = OpSelect %6 %134 %62 %64 +%136 = OpAccessChain %132 %127 %64 +OpStore %136 %135 +%138 = OpCompositeExtract %3 %131 1 +%139 = OpVectorTimesScalar %4 %129 %138 +%140 = OpFunctionCall %4 %105 %139 %131 +%141 = OpAccessChain %137 %127 %62 +OpStore %141 %140 OpReturn OpFunctionEnd -%105 = OpFunction %2 None %85 -%104 = OpLabel -%110 = OpVariable %32 Function -%106 = OpLoad %5 %15 -%107 = OpAccessChain %87 %17 %88 -OpBranch %111 -%111 = OpLabel -%112 = OpCompositeExtract %6 %108 0 -%113 = OpCompositeExtract %6 %108 1 -%114 = OpCompositeExtract %3 %108 2 -%115 = OpCompositeExtract %3 %108 3 -%116 = OpCompositeExtract %4 %108 4 -%117 = OpCompositeExtract %4 %108 5 -OpRayQueryInitializeKHR %110 %106 %112 %113 %116 %114 %117 %115 -%118 = OpRayQueryGetIntersectionTypeKHR %6 %110 %88 -%119 = OpIEqual %8 %118 %88 -%120 = OpSelect %6 %119 %50 %109 -%121 = OpRayQueryGetIntersectionInstanceCustomIndexKHR %6 %110 %88 -%122 = OpRayQueryGetIntersectionInstanceIdKHR %6 %110 %88 -%123 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR %6 %110 %88 -%124 = OpRayQueryGetIntersectionGeometryIndexKHR %6 %110 %88 -%125 = OpRayQueryGetIntersectionPrimitiveIndexKHR %6 %110 %88 -%126 = OpRayQueryGetIntersectionTKHR %3 %110 %88 -%127 = OpRayQueryGetIntersectionBarycentricsKHR %7 %110 %88 -%128 = OpRayQueryGetIntersectionFrontFaceKHR %8 %110 %88 -%129 = OpRayQueryGetIntersectionObjectToWorldKHR %9 %110 %88 -%130 = OpRayQueryGetIntersectionWorldToObjectKHR %9 %110 %88 -%131 = OpCompositeConstruct %10 %120 %126 %121 %122 %123 %124 %125 %127 %128 %129 %130 -%132 = OpCompositeExtract %6 %131 0 -%133 = OpIEqual %8 %132 %109 -%134 = OpSelect %6 %133 %50 %88 -%135 = OpAccessChain %94 %107 %88 -OpStore %135 %134 +%143 = OpFunction %2 None %124 +%142 = OpLabel +%147 = OpVariable %32 Function +%144 = OpLoad %5 %15 +%145 = OpAccessChain %126 %17 %64 +OpBranch %148 +%148 = OpLabel +%149 = OpCompositeExtract %6 %146 0 +%150 = OpCompositeExtract %6 %146 1 +%151 = OpCompositeExtract %3 %146 2 +%152 = OpCompositeExtract %3 %146 3 +%153 = OpCompositeExtract %4 %146 4 +%154 = OpCompositeExtract %4 %146 5 +OpRayQueryInitializeKHR %147 %144 %149 %150 %153 %151 %154 %152 +%155 = OpFunctionCall %10 %57 %147 +%156 = OpCompositeExtract %6 %155 0 +%157 = OpIEqual %8 %156 %78 +%158 = OpSelect %6 %157 %62 %64 +%159 = OpAccessChain %132 %145 %64 +OpStore %159 %158 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/storage-textures.spvasm b/naga/tests/out/spv/storage-textures.spvasm new file mode 100644 index 0000000000..99a8dd477f --- /dev/null +++ b/naga/tests/out/spv/storage-textures.spvasm @@ -0,0 +1,79 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 42 +OpCapability Shader +OpCapability StorageImageExtendedFormats +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %20 "csLoad" +OpEntryPoint GLCompute %35 "csStore" +OpExecutionMode %20 LocalSize 1 1 1 +OpExecutionMode %35 LocalSize 1 1 1 +OpDecorate %7 NonWritable +OpDecorate %7 DescriptorSet 0 +OpDecorate %7 Binding 0 +OpDecorate %9 NonWritable +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 1 +OpDecorate %11 NonWritable +OpDecorate %11 DescriptorSet 0 +OpDecorate %11 Binding 2 +OpDecorate %13 NonReadable +OpDecorate %13 DescriptorSet 1 +OpDecorate %13 Binding 0 +OpDecorate %15 NonReadable +OpDecorate %15 DescriptorSet 1 +OpDecorate %15 Binding 1 +OpDecorate %17 NonReadable +OpDecorate %17 DescriptorSet 1 +OpDecorate %17 Binding 2 +%2 = OpTypeVoid +%4 = OpTypeFloat 32 +%3 = OpTypeImage %4 2D 0 0 0 2 R32f +%5 = OpTypeImage %4 2D 0 0 0 2 Rg32f +%6 = OpTypeImage %4 2D 0 0 0 2 Rgba32f +%8 = OpTypePointer UniformConstant %3 +%7 = OpVariable %8 UniformConstant +%10 = OpTypePointer UniformConstant %5 +%9 = OpVariable %10 UniformConstant +%12 = OpTypePointer UniformConstant %6 +%11 = OpVariable %12 UniformConstant +%14 = OpTypePointer UniformConstant %3 +%13 = OpVariable %14 UniformConstant +%16 = OpTypePointer UniformConstant %5 +%15 = OpVariable %16 UniformConstant +%18 = OpTypePointer UniformConstant %6 +%17 = OpVariable %18 UniformConstant +%21 = OpTypeFunction %2 +%25 = OpTypeInt 32 0 +%26 = OpConstant %25 0 +%27 = OpTypeVector %25 2 +%28 = OpConstantComposite %27 %26 %26 +%30 = OpTypeVector %4 4 +%39 = OpConstant %4 0.0 +%40 = OpConstantComposite %30 %39 %39 %39 %39 +%20 = OpFunction %2 None %21 +%19 = OpLabel +%22 = OpLoad %3 %7 +%23 = OpLoad %5 %9 +%24 = OpLoad %6 %11 +OpBranch %29 +%29 = OpLabel +%31 = OpImageRead %30 %22 %28 +%32 = OpImageRead %30 %23 %28 +%33 = OpImageRead %30 %24 %28 +OpReturn +OpFunctionEnd +%35 = OpFunction %2 None %21 +%34 = OpLabel +%36 = OpLoad %3 %13 +%37 = OpLoad %5 %15 +%38 = OpLoad %6 %17 +OpBranch %41 +%41 = OpLabel +OpImageWrite %36 %28 %40 +OpImageWrite %37 %28 %40 +OpImageWrite %38 %28 %40 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/280-matrix-cast.frag.wgsl b/naga/tests/out/wgsl/280-matrix-cast.frag.wgsl index 6cc18567cd..6ac9d37540 100644 --- a/naga/tests/out/wgsl/280-matrix-cast.frag.wgsl +++ b/naga/tests/out/wgsl/280-matrix-cast.frag.wgsl @@ -1,6 +1,7 @@ fn main_1() { var a: mat4x4 = mat4x4(vec4(1f, 0f, 0f, 0f), vec4(0f, 1f, 0f, 0f), vec4(0f, 0f, 1f, 0f), vec4(0f, 0f, 0f, 1f)); + return; } @fragment diff --git a/naga/tests/out/wgsl/6772-unpack-expr-accesses.wgsl b/naga/tests/out/wgsl/6772-unpack-expr-accesses.wgsl index 4c13bc50f3..69f8f1d48b 100644 --- a/naga/tests/out/wgsl/6772-unpack-expr-accesses.wgsl +++ b/naga/tests/out/wgsl/6772-unpack-expr-accesses.wgsl @@ -2,4 +2,5 @@ fn main() { let phony = unpack4xI8(12u)[2i]; let phony_1 = unpack4xU8(12u).y; + return; } diff --git a/naga/tests/out/wgsl/abstract-types-operators.wgsl b/naga/tests/out/wgsl/abstract-types-operators.wgsl index caafb535ce..6b8ffceb14 100644 --- a/naga/tests/out/wgsl/abstract-types-operators.wgsl +++ b/naga/tests/out/wgsl/abstract-types-operators.wgsl @@ -88,5 +88,6 @@ fn wgpu_4445_() { fn wgpu_4435_() { let y = a[(1i - 1i)]; + return; } diff --git a/naga/tests/out/wgsl/abstract-types-return.wgsl b/naga/tests/out/wgsl/abstract-types-return.wgsl new file mode 100644 index 0000000000..c25a217dac --- /dev/null +++ b/naga/tests/out/wgsl/abstract-types-return.wgsl @@ -0,0 +1,28 @@ +fn return_i32_ai() -> i32 { + return 1i; +} + +fn return_u32_ai() -> u32 { + return 1u; +} + +fn return_f32_ai() -> f32 { + return 1f; +} + +fn return_f32_af() -> f32 { + return 1f; +} + +fn return_vec2f32_ai() -> vec2 { + return vec2(1f); +} + +fn return_arrf32_ai() -> array { + return array(1f, 1f, 1f, 1f); +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + return; +} diff --git a/naga/tests/out/wgsl/access.wgsl b/naga/tests/out/wgsl/access.wgsl index 4d60beddb7..fd4f742522 100644 --- a/naga/tests/out/wgsl/access.wgsl +++ b/naga/tests/out/wgsl/access.wgsl @@ -29,6 +29,19 @@ struct AssignToMember { x: u32, } +struct S { + m: i32, +} + +struct Inner { + delicious: i32, +} + +struct Outer { + om_nom_nom: Inner, + thing: u32, +} + var global_const: GlobalConst = GlobalConst(0u, vec3(0u, 0u, 0u), 0i); @group(0) @binding(0) var bar: Bar; @@ -150,6 +163,46 @@ fn assign_to_arg_ptr_array_element(p_4: ptr>) { return; } +fn index_ptr(value: bool) -> bool { + var a_1: array; + + a_1 = array(value); + let _e4 = a_1[0]; + return _e4; +} + +fn member_ptr() -> i32 { + var s: S = S(42i); + + let _e4 = s.m; + return _e4; +} + +fn let_members_of_members() -> i32 { + const inner_1 = Outer().om_nom_nom; + const delishus_1 = inner_1.delicious; + if (Outer().thing != u32(delishus_1)) { + } + return Outer().om_nom_nom.delicious; +} + +fn var_members_of_members() -> i32 { + var thing: Outer = Outer(); + var inner: Inner; + var delishus: i32; + + let _e3 = thing.om_nom_nom; + inner = _e3; + let _e6 = inner.delicious; + delishus = _e6; + let _e9 = thing.thing; + let _e10 = delishus; + if (_e9 != u32(_e10)) { + } + let _e15 = thing.om_nom_nom.delicious; + return _e15; +} + @vertex fn foo_vert(@builtin(vertex_index) vi: u32) -> @builtin(position) vec4 { var foo: f32 = 0f; @@ -162,15 +215,15 @@ fn foo_vert(@builtin(vertex_index) vi: u32) -> @builtin(position) vec4 { let _matrix = bar._matrix; let arr_1 = bar.arr; let b = bar._matrix[3u][0]; - let a_1 = bar.data[(arrayLength((&bar.data)) - 2u)].value; + let a_2 = bar.data[(arrayLength((&bar.data)) - 2u)].value; let c = qux; let data_pointer = (&bar.data[0].value); let _e33 = read_from_private((&foo)); - c2_ = array(a_1, i32(b), 3i, 4i, 5i); + c2_ = array(a_2, i32(b), 3i, 4i, 5i); c2_[(vi + 1u)] = 42i; - let value = c2_[vi]; + let value_1 = c2_[vi]; let _e47 = test_arr_as_arg(array, 5>()); - return vec4((_matrix * vec4(vec4(value))), 2f); + return vec4((_matrix * vec4(vec4(value_1))), 2f); } @fragment diff --git a/naga/tests/out/wgsl/array-in-ctor.wgsl b/naga/tests/out/wgsl/array-in-ctor.wgsl index 8c17731f0c..4cce8641b2 100644 --- a/naga/tests/out/wgsl/array-in-ctor.wgsl +++ b/naga/tests/out/wgsl/array-in-ctor.wgsl @@ -8,4 +8,5 @@ var ah: Ah; @compute @workgroup_size(1, 1, 1) fn cs_main() { let ah_1 = ah; + return; } diff --git a/naga/tests/out/wgsl/array-in-function-return-type.wgsl b/naga/tests/out/wgsl/array-in-function-return-type.wgsl index 2beacd3ff4..a39680093c 100644 --- a/naga/tests/out/wgsl/array-in-function-return-type.wgsl +++ b/naga/tests/out/wgsl/array-in-function-return-type.wgsl @@ -2,8 +2,15 @@ fn ret_array() -> array { return array(1f, 2f); } +fn ret_array_array() -> array, 3> { + let _e0 = ret_array(); + let _e1 = ret_array(); + let _e2 = ret_array(); + return array, 3>(_e0, _e1, _e2); +} + @fragment fn main() -> @location(0) vec4 { - let _e0 = ret_array(); - return vec4(_e0[0], _e0[1], 0f, 1f); + let _e0 = ret_array_array(); + return vec4(_e0[0][0], _e0[0][1], 0f, 1f); } diff --git a/naga/tests/out/wgsl/const-exprs.wgsl b/naga/tests/out/wgsl/const-exprs.wgsl index 20a43e2044..411e835b5d 100644 --- a/naga/tests/out/wgsl/const-exprs.wgsl +++ b/naga/tests/out/wgsl/const-exprs.wgsl @@ -16,16 +16,19 @@ const compare_vec: vec2 = vec2(true, false); fn swizzle_of_compose() { var out: vec4 = vec4(4i, 3i, 2i, 1i); + return; } fn index_of_compose() { var out_1: i32 = 2i; + return; } fn compose_three_deep() { var out_2: i32 = 6i; + return; } fn non_constant_initializers() { @@ -50,16 +53,19 @@ fn non_constant_initializers() { fn splat_of_constant() { var out_4: vec4 = vec4(-4i, -4i, -4i, -4i); + return; } fn compose_of_constant() { var out_5: vec4 = vec4(-4i, -4i, -4i, -4i); + return; } fn compose_of_splat() { var x_1: vec4 = vec4(2f, 1f, 1f, 1f); + return; } fn map_texture_kind(texture_kind: i32) -> u32 { diff --git a/naga/tests/out/wgsl/constructors.wgsl b/naga/tests/out/wgsl/constructors.wgsl index 6d9d7e2f5d..622903d7eb 100644 --- a/naga/tests/out/wgsl/constructors.wgsl +++ b/naga/tests/out/wgsl/constructors.wgsl @@ -28,4 +28,5 @@ fn main() { const cit2_ = array(0i, 1i, 2i, 3i); const ic4_ = vec2(0u, 0u); const ic5_ = mat2x3(vec3(0f, 0f, 0f), vec3(0f, 0f, 0f)); + return; } diff --git a/naga/tests/out/wgsl/cross.wgsl b/naga/tests/out/wgsl/cross.wgsl index 2e213aa9c4..c9ed52f5af 100644 --- a/naga/tests/out/wgsl/cross.wgsl +++ b/naga/tests/out/wgsl/cross.wgsl @@ -1,4 +1,5 @@ @compute @workgroup_size(1, 1, 1) fn main() { let a = cross(vec3(0f, 1f, 2f), vec3(0f, 1f, 2f)); + return; } diff --git a/naga/tests/out/wgsl/expressions.frag.wgsl b/naga/tests/out/wgsl/expressions.frag.wgsl index ec53847d5f..aec2b41c8b 100644 --- a/naga/tests/out/wgsl/expressions.frag.wgsl +++ b/naga/tests/out/wgsl/expressions.frag.wgsl @@ -282,16 +282,19 @@ fn testUnaryOpMat(a_16: mat3x3) { fn testStructConstructor() { var tree: BST = BST(1i); + return; } fn testNonScalarToScalarConstructor() { var f: f32 = 1f; + return; } fn testArrayConstructor() { var tree_1: array = array(0f); + return; } fn testFreestandingConstructor() { @@ -389,6 +392,7 @@ fn testConstantLength(a_24: array) { var len_1: i32 = 4i; a_25 = a_24; + return; } fn indexConstantNonConstantIndex(i: i32) { diff --git a/naga/tests/out/wgsl/global-constant-array.frag.wgsl b/naga/tests/out/wgsl/global-constant-array.frag.wgsl index bdb509dc35..0370f9b16a 100644 --- a/naga/tests/out/wgsl/global-constant-array.frag.wgsl +++ b/naga/tests/out/wgsl/global-constant-array.frag.wgsl @@ -6,6 +6,7 @@ fn main_1() { var local: array = array_; let _e2 = i; + return; } @fragment diff --git a/naga/tests/out/wgsl/globals.wgsl b/naga/tests/out/wgsl/globals.wgsl index 229d40ccf6..d9e2bdb2cd 100644 --- a/naga/tests/out/wgsl/globals.wgsl +++ b/naga/tests/out/wgsl/globals.wgsl @@ -42,6 +42,7 @@ fn test_msl_packed_vec3_() { let mvm1_ = (mat3x3() * data.v3_); let svm0_ = (data.v3_ * 2f); let svm1_ = (2f * data.v3_); + return; } @compute @workgroup_size(1, 1, 1) diff --git a/naga/tests/out/wgsl/lexical-scopes.wgsl b/naga/tests/out/wgsl/lexical-scopes.wgsl index e787f96b10..f14fc1a8c2 100644 --- a/naga/tests/out/wgsl/lexical-scopes.wgsl +++ b/naga/tests/out/wgsl/lexical-scopes.wgsl @@ -61,5 +61,6 @@ fn switchLexicalScope(a_6: i32) { } } let test = (a_6 == 2i); + return; } diff --git a/naga/tests/out/wgsl/local-const.wgsl b/naga/tests/out/wgsl/local-const.wgsl index 587f5a8e54..06e52a9277 100644 --- a/naga/tests/out/wgsl/local-const.wgsl +++ b/naga/tests/out/wgsl/local-const.wgsl @@ -7,5 +7,6 @@ const gf: f32 = 2f; fn const_in_fn() { const e = vec3(4i, 4i, 4i); + return; } diff --git a/naga/tests/out/wgsl/long-form-matrix.frag.wgsl b/naga/tests/out/wgsl/long-form-matrix.frag.wgsl index c69439159a..68ccff91aa 100644 --- a/naga/tests/out/wgsl/long-form-matrix.frag.wgsl +++ b/naga/tests/out/wgsl/long-form-matrix.frag.wgsl @@ -8,6 +8,7 @@ fn main_1() { var d: mat3x3 = mat3x3(vec3(2f, 2f, 1f), vec3(1f, 1f, 1f), vec3(1f, 1f, 1f)); var e: mat4x4 = mat4x4(vec4(2f, 2f, 1f, 1f), vec4(1f, 1f, 2f, 2f), vec4(1f, 1f, 1f, 1f), vec4(1f, 1f, 1f, 1f)); + return; } @fragment diff --git a/naga/tests/out/wgsl/math-functions.wgsl b/naga/tests/out/wgsl/math-functions.wgsl index f48a5dd025..b4d876a7f6 100644 --- a/naga/tests/out/wgsl/math-functions.wgsl +++ b/naga/tests/out/wgsl/math-functions.wgsl @@ -36,4 +36,5 @@ fn main() { let quantizeToF16_b = quantizeToF16(vec2(1f, 1f)); let quantizeToF16_c = quantizeToF16(vec3(1f, 1f, 1f)); let quantizeToF16_d = quantizeToF16(vec4(1f, 1f, 1f, 1f)); + return; } diff --git a/naga/tests/out/wgsl/module-scope.wgsl b/naga/tests/out/wgsl/module-scope.wgsl index 48e3325e53..c9b0496a64 100644 --- a/naga/tests/out/wgsl/module-scope.wgsl +++ b/naga/tests/out/wgsl/module-scope.wgsl @@ -21,5 +21,6 @@ fn call() { statement(); let _e0 = returns(); let s = textureSample(Texture, Sampler, vec2(1f)); + return; } diff --git a/naga/tests/out/wgsl/operators.wgsl b/naga/tests/out/wgsl/operators.wgsl index 2194a01df1..4a6f1c8ab4 100644 --- a/naga/tests/out/wgsl/operators.wgsl +++ b/naga/tests/out/wgsl/operators.wgsl @@ -48,6 +48,7 @@ fn logical() { let bitwise_or1_ = (vec3(true) | vec3(false)); let bitwise_and0_ = (true & false); let bitwise_and1_ = (vec4(true) & vec4(false)); + return; } fn arithmetic() { @@ -123,6 +124,7 @@ fn arithmetic() { let mul_vector0_ = (mat4x3() * vec4(1f)); let mul_vector1_ = (vec3(2f) * mat4x3()); let mul = (mat4x3() * mat3x4()); + return; } fn bit() { @@ -150,6 +152,7 @@ fn bit() { let shr1_ = (2u >> 1u); let shr2_ = (vec2(2i) >> vec2(1u)); let shr3_ = (vec3(2u) >> vec3(1u)); + return; } fn comparison() { @@ -189,6 +192,7 @@ fn comparison() { let gte3_ = (vec2(2i) >= vec2(1i)); let gte4_ = (vec3(2u) >= vec3(1u)); let gte5_ = (vec4(2f) >= vec4(1f)); + return; } fn assignment() { @@ -238,6 +242,7 @@ fn negation_avoids_prefix_decrement() { const p5_ = -(-(-(-(1i)))); const p6_ = -(-(-(-(-(1i))))); const p7_ = -(-(-(-(-(1i))))); + return; } @compute @workgroup_size(1, 1, 1) diff --git a/naga/tests/out/wgsl/phony_assignment.wgsl b/naga/tests/out/wgsl/phony_assignment.wgsl index 60987a15cb..8e28b74314 100644 --- a/naga/tests/out/wgsl/phony_assignment.wgsl +++ b/naga/tests/out/wgsl/phony_assignment.wgsl @@ -12,4 +12,5 @@ fn main(@builtin(global_invocation_id) id: vec3) { let _e6 = five(); let _e7 = five(); let phony_2 = binding; + return; } diff --git a/naga/tests/out/wgsl/storage-textures.wgsl b/naga/tests/out/wgsl/storage-textures.wgsl new file mode 100644 index 0000000000..585df8baff --- /dev/null +++ b/naga/tests/out/wgsl/storage-textures.wgsl @@ -0,0 +1,13 @@ +@group(0) @binding(0) +var s_r: texture_storage_2d; +@group(0) @binding(1) +var s_rg: texture_storage_2d; +@group(0) @binding(2) +var s_rgba: texture_storage_2d; + +@compute @workgroup_size(1, 1, 1) +fn csWithStorageUsage() { + let phony = textureLoad(s_r, vec2(0u)); + let phony_1 = textureLoad(s_rg, vec2(0u)); + let phony_2 = textureLoad(s_rgba, vec2(0u)); +} diff --git a/naga/tests/out/wgsl/type-alias.wgsl b/naga/tests/out/wgsl/type-alias.wgsl index 13bfcba82c..ffb8dcbb08 100644 --- a/naga/tests/out/wgsl/type-alias.wgsl +++ b/naga/tests/out/wgsl/type-alias.wgsl @@ -6,5 +6,6 @@ fn main() { const e = vec3(d); const f = mat2x2(vec2(1f, 2f), vec2(3f, 4f)); const g = mat3x3(a, a, a); + return; } diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 691878959d..4f98259c35 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -239,8 +239,13 @@ impl Input { let mut param_path = self.input_path(); param_path.set_extension("param.ron"); match fs::read_to_string(¶m_path) { - Ok(string) => ron::de::from_str(&string) - .unwrap_or_else(|_| panic!("Couldn't parse param file: {}", param_path.display())), + Ok(string) => match ron::de::from_str(&string) { + Ok(params) => params, + Err(e) => panic!( + "Couldn't parse param file: {} due to: {e}", + param_path.display() + ), + }, Err(_) => Parameters::default(), } } @@ -676,14 +681,13 @@ fn convert_wgsl() { let _ = env_logger::try_init(); let inputs = [ - // TODO: merge array-in-ctor and array-in-function-return-type tests after fix HLSL issue https://github.com/gfx-rs/naga/issues/1930 ( "array-in-ctor", Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, ), ( "array-in-function-return-type", - Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::WGSL, + Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, ), ( "empty", @@ -834,6 +838,7 @@ fn convert_wgsl() { Targets::SPIRV | Targets::METAL | Targets::GLSL, ), ("policy-mix", Targets::SPIRV | Targets::METAL), + ("bounds-check-dynamic-buffer", Targets::HLSL), ( "texture-arg", Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, @@ -916,6 +921,10 @@ fn convert_wgsl() { "abstract-types-operators", Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::WGSL, ), + ( + "abstract-types-return", + Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, + ), ( "int64", Targets::SPIRV | Targets::HLSL | Targets::WGSL | Targets::METAL, @@ -961,6 +970,11 @@ fn convert_wgsl() { "6772-unpack-expr-accesses", Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, ), + ("must-use", Targets::IR), + ( + "storage-textures", + Targets::IR | Targets::ANALYSIS | Targets::SPIRV | Targets::METAL | Targets::HLSL, + ), ]; for &(name, targets) in inputs.iter() { diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs index fc4d7211f8..5abd61cd8f 100644 --- a/naga/tests/wgsl_errors.rs +++ b/naga/tests/wgsl_errors.rs @@ -580,44 +580,6 @@ fn local_var_missing_type() { ); } -#[test] -fn postfix_pointers() { - check( - r#" - fn main() { - var v: vec4 = vec4(1.0, 1.0, 1.0, 1.0); - let pv = &v; - let a = *pv[3]; // Problematic line - } - "#, - r#"error: the value indexed by a `[]` subscripting expression must not be a pointer - ┌─ wgsl:5:26 - │ -5 │ let a = *pv[3]; // Problematic line - │ ^^ expression is a pointer - -"#, - ); - - check( - r#" - struct S { m: i32 }; - fn main() { - var s: S = S(42); - let ps = &s; - let a = *ps.m; // Problematic line - } - "#, - r#"error: the value accessed by a `.member` expression must not be a pointer - ┌─ wgsl:6:26 - │ -6 │ let a = *ps.m; // Problematic line - │ ^^ expression is a pointer - -"#, - ); -} - #[test] fn reserved_keyword() { // global var @@ -1212,7 +1174,7 @@ fn invalid_functions() { if function_name == "return_pointer" } - check_validation! { + check( " @group(0) @binding(0) var atom: atomic; @@ -1220,14 +1182,15 @@ fn invalid_functions() { fn return_atomic() -> atomic { return atom; } - ": - Err(naga::valid::ValidationError::Function { - name: function_name, - source: naga::valid::FunctionError::NonConstructibleReturnType, - .. - }) - if function_name == "return_atomic" - } + ", + "error: automatic conversions cannot convert `u32` to `atomic` + ┌─ wgsl:6:19 + │ +6 │ return atom; + │ ^^^^ this expression has type u32 + +", + ); } #[test] @@ -2003,6 +1966,94 @@ fn function_returns_void() { ) } +#[test] +fn function_must_use_unused() { + check( + r#" +@must_use +fn use_me(a: i32) -> i32 { + return 10; +} + +fn useless() -> i32 { + use_me(1); + return 0; +} +"#, + r#"error: unused return value from function annotated with @must_use + ┌─ wgsl:8:3 + │ +8 │ use_me(1); + │ ^^^^^^ + │ + = note: function 'use_me' is declared with `@must_use` attribute + = note: use a phony assignment or declare a value using the function call as the initializer + +"#, + ); +} + +#[test] +fn function_must_use_returns_void() { + check( + r#" +@must_use +fn use_me(a: i32) { + let x = a; +} +"#, + r#"error: function annotated with @must_use but does not return any value + ┌─ wgsl:2:2 + │ +2 │ @must_use + │ ^^^^^^^^ +3 │ fn use_me(a: i32) { + │ ^^^^^^^^^^^^^ + │ + = note: declare a return type or remove the attribute + +"#, + ); +} + +#[test] +fn function_must_use_repeated() { + check( + r#" +@must_use +@must_use +fn use_me(a: i32) -> i32 { + return 10; +} +"#, + r#"error: repeated attribute: `must_use` + ┌─ wgsl:3:2 + │ +3 │ @must_use + │ ^^^^^^^^ repeated attribute + +"#, + ); +} + +#[test] +fn struct_member_must_use() { + check( + r#" +struct S { + @must_use a: i32, +} +"#, + r#"error: unknown attribute: `must_use` + ┌─ wgsl:3:4 + │ +3 │ @must_use a: i32, + │ ^^^^^^^^ unknown attribute + +"#, + ) +} + #[test] fn function_param_redefinition_as_param() { check( @@ -2041,6 +2092,27 @@ fn function_param_redefinition_as_local() { ) } +#[test] +fn function_must_return_value() { + check_validation!( + "fn func() -> i32 { + }": + Err(naga::valid::ValidationError::Function { + source: naga::valid::FunctionError::InvalidReturnType(_), + .. + }) + ); + check_validation!( + "fn func(x: i32) -> i32 { + let y = x + 10; + }": + Err(naga::valid::ValidationError::Function { + source: naga::valid::FunctionError::InvalidReturnType(_), + .. + }) + ); +} + #[test] fn constructor_type_error_span() { check( diff --git a/naga/xtask/Cargo.toml b/naga/xtask/Cargo.toml index 2b3f0247ac..5e779d2944 100644 --- a/naga/xtask/Cargo.toml +++ b/naga/xtask/Cargo.toml @@ -3,6 +3,7 @@ name = "xtask" version = "0.1.0" edition = "2021" publish = false +rust-version = "1.83" [dependencies] anyhow = "1" diff --git a/naga/xtask/src/main.rs b/naga/xtask/src/main.rs index aed7f48c71..76ca91383d 100644 --- a/naga/xtask/src/main.rs +++ b/naga/xtask/src/main.rs @@ -16,7 +16,6 @@ mod glob; mod jobserver; mod path; mod process; -mod result; mod validate; fn main() -> ExitCode { diff --git a/naga/xtask/src/result.rs b/naga/xtask/src/result.rs deleted file mode 100644 index e351ebf4cf..0000000000 --- a/naga/xtask/src/result.rs +++ /dev/null @@ -1,33 +0,0 @@ -#[derive(Clone, Copy, Debug)] -pub(crate) enum ErrorStatus { - NoFailuresFound, - OneOrMoreFailuresFound, -} - -impl ErrorStatus { - pub(crate) fn merge(self, other: Self) -> Self { - match (self, other) { - (Self::OneOrMoreFailuresFound, _) | (_, Self::OneOrMoreFailuresFound) => { - Self::OneOrMoreFailuresFound - } - (Self::NoFailuresFound, Self::NoFailuresFound) => Self::NoFailuresFound, - } - } -} - -pub(crate) trait LogIfError { - fn log_if_err_found(self, status: &mut ErrorStatus) -> Option; -} - -impl LogIfError for anyhow::Result { - fn log_if_err_found(self, status: &mut ErrorStatus) -> Option { - match self { - Ok(t) => Some(t), - Err(e) => { - log::error!("{e:?}"); - *status = status.merge(ErrorStatus::OneOrMoreFailuresFound); - None - } - } - } -} diff --git a/naga/xtask/src/validate.rs b/naga/xtask/src/validate.rs index 2006aa83ae..be6a900630 100644 --- a/naga/xtask/src/validate.rs +++ b/naga/xtask/src/validate.rs @@ -26,7 +26,7 @@ pub(crate) fn validate(cmd: ValidateSubcommand) -> anyhow::Result<()> { for job in jobs { let tx_results = tx_results.clone(); crate::jobserver::start_job_thread(move || { - let result = match std::panic::catch_unwind(|| job()) { + let result = match std::panic::catch_unwind(job) { Ok(result) => result, Err(payload) => Err(match payload.downcast_ref::<&str>() { Some(message) => { diff --git a/player/Cargo.toml b/player/Cargo.toml index 3aa4c3f5a8..086e2563a7 100644 --- a/player/Cargo.toml +++ b/player/Cargo.toml @@ -8,6 +8,7 @@ homepage.workspace = true repository.workspace = true keywords.workspace = true license.workspace = true +rust-version.workspace = true publish = false [lib] @@ -21,17 +22,18 @@ path = "src/bin/play.rs" test = false [dependencies] +wgpu-types = { workspace = true, features = ["serde"] } + env_logger.workspace = true log.workspace = true raw-window-handle.workspace = true ron.workspace = true winit = { workspace = true, optional = true } -[dependencies.wgt] -workspace = true -features = ["serde"] - -[target.'cfg(not(target_arch = "wasm32"))'.dependencies.wgc] +# Non-Webassembly +# +# We are a non-wasm only crate, and this allows us to compile. +[target.'cfg(not(target_arch = "wasm32"))'.dependencies.wgpu-core] workspace = true features = [ "replay", diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs index f5bafe57e4..7c8ec3f3cf 100644 --- a/player/src/bin/play.rs +++ b/player/src/bin/play.rs @@ -2,6 +2,9 @@ #[cfg(not(target_arch = "wasm32"))] fn main() { + extern crate wgpu_core as wgc; + extern crate wgpu_types as wgt; + use player::GlobalPlay as _; use wgc::device::trace; diff --git a/player/src/lib.rs b/player/src/lib.rs index 28ae23ad7c..ccf1f5a473 100644 --- a/player/src/lib.rs +++ b/player/src/lib.rs @@ -3,6 +3,9 @@ #![cfg(not(target_arch = "wasm32"))] #![warn(clippy::allow_attributes, unsafe_op_in_unsafe_fn)] +extern crate wgpu_core as wgc; +extern crate wgpu_types as wgt; + use wgc::device::trace; use std::{borrow::Cow, fs, path::Path}; @@ -191,7 +194,7 @@ impl GlobalPlay for wgc::global::Global { .map(|instance| wgc::ray_tracing::TlasInstance { blas_id: instance.blas_id, transform: &instance.transform, - custom_index: instance.custom_index, + custom_data: instance.custom_data, mask: instance.mask, }) }); diff --git a/player/tests/test.rs b/player/tests/test.rs index d50f21dd55..7276e6c292 100644 --- a/player/tests/test.rs +++ b/player/tests/test.rs @@ -10,6 +10,9 @@ #![cfg(not(target_arch = "wasm32"))] +extern crate wgpu_core as wgc; +extern crate wgpu_types as wgt; + use player::GlobalPlay; use std::{ fs::{read_to_string, File}, diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 214aec9f04..10a0555aa5 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -8,6 +8,7 @@ homepage.workspace = true repository.workspace = true keywords.workspace = true license.workspace = true +rust-version.workspace = true autotests = false publish = false @@ -25,8 +26,12 @@ harness = true webgl = ["wgpu/webgl"] [dependencies] +wgpu.workspace = true +wgpu-macros.workspace = true + anyhow.workspace = true arrayvec.workspace = true +approx.workspace = true bitflags.workspace = true bytemuck.workspace = true cfg-if.workspace = true @@ -45,20 +50,20 @@ serde_json.workspace = true serde.workspace = true strum = { workspace = true, features = ["derive"] } trybuild.workspace = true -wgpu.workspace = true -wgpu-macros.workspace = true -wgt = { workspace = true, features = ["serde"] } +# Non-Webassembly [target.'cfg(not(target_arch = "wasm32"))'.dependencies] env_logger.workspace = true nv-flip.workspace = true parking_lot = { workspace = true, features = ["deadlock_detection"] } +# Webassembly [target.'cfg(target_arch = "wasm32")'.dependencies] console_log.workspace = true wasm-bindgen.workspace = true web-sys = { workspace = true } +# Webassembly Dev Dependencies [target.'cfg(target_arch = "wasm32")'.dev-dependencies] image.workspace = true js-sys.workspace = true @@ -66,3 +71,6 @@ wasm-bindgen-futures.workspace = true wasm-bindgen-test.workspace = true wasm-bindgen.workspace = true web-sys = { workspace = true, features = ["CanvasRenderingContext2d", "Blob"] } + +[lints.clippy] +disallowed_types = "allow" diff --git a/tests/src/expectations.rs b/tests/src/expectations.rs index a3c90eac0b..b6e0beaeff 100644 --- a/tests/src/expectations.rs +++ b/tests/src/expectations.rs @@ -35,7 +35,7 @@ use core::fmt; /// /// [skip]: super::TestParameters::skip /// [expect_fail]: super::TestParameters::expect_fail -/// [`AdapterInfo`]: wgt::AdapterInfo +/// [`AdapterInfo`]: wgpu::AdapterInfo #[derive(Default, Clone)] pub struct FailureCase { /// Backends expected to fail, or `None` for any backend. @@ -50,7 +50,7 @@ pub struct FailureCase { /// usually the PCI device id. Otherwise, this `FailureCase` /// applies regardless of vendor. /// - /// [`AdapterInfo::device`]: wgt::AdapterInfo::device + /// [`AdapterInfo::device`]: wgpu::AdapterInfo::device pub vendor: Option, /// Name of adapter expected to fail, or `None` for any adapter name. @@ -59,7 +59,7 @@ pub struct FailureCase { /// [`AdapterInfo::name`], then this `FailureCase` applies. If /// this is `None`, the adapter name isn't considered. /// - /// [`AdapterInfo::name`]: wgt::AdapterInfo::name + /// [`AdapterInfo::name`]: wgpu::AdapterInfo::name pub adapter: Option<&'static str>, /// Name of driver expected to fail, or `None` for any driver name. @@ -68,7 +68,7 @@ pub struct FailureCase { /// [`AdapterInfo::driver`], then this `FailureCase` applies. If /// this is `None`, the driver name isn't considered. /// - /// [`AdapterInfo::driver`]: wgt::AdapterInfo::driver + /// [`AdapterInfo::driver`]: wgpu::AdapterInfo::driver pub driver: Option<&'static str>, /// Reason why the test is expected to fail. @@ -115,7 +115,7 @@ impl FailureCase { /// of the adapter's [`AdapterInfo::name`]. The comparison is /// case-insensitive. /// - /// [`AdapterInfo::name`]: wgt::AdapterInfo::name + /// [`AdapterInfo::name`]: wgpu::AdapterInfo::name pub fn adapter(adapter: &'static str) -> Self { FailureCase { adapter: Some(adapter), @@ -130,7 +130,7 @@ impl FailureCase { /// the adapter's [`AdapterInfo::name`]. The string comparison is /// case-insensitive. /// - /// [`AdapterInfo::name`]: wgt::AdapterInfo::name + /// [`AdapterInfo::name`]: wgpu::AdapterInfo::name pub fn backend_adapter(backends: wgpu::Backends, adapter: &'static str) -> Self { FailureCase { backends: Some(backends), @@ -207,7 +207,7 @@ impl FailureCase { /// matching. pub(crate) fn applies_to_adapter( &self, - info: &wgt::AdapterInfo, + info: &wgpu::AdapterInfo, ) -> Option { let mut reasons = FailureApplicationReasons::empty(); diff --git a/tests/src/image.rs b/tests/src/image.rs index 7a639b1f80..df74af309d 100644 --- a/tests/src/image.rs +++ b/tests/src/image.rs @@ -151,7 +151,7 @@ impl ComparisonType { #[cfg(not(target_arch = "wasm32"))] pub async fn compare_image_output( path: impl AsRef + AsRef, - adapter_info: &wgt::AdapterInfo, + adapter_info: &wgpu::AdapterInfo, width: u32, height: u32, test_with_alpha: &[u8], @@ -253,7 +253,7 @@ pub async fn compare_image_output( #[cfg(target_arch = "wasm32")] pub async fn compare_image_output( path: impl AsRef + AsRef, - adapter_info: &wgt::AdapterInfo, + adapter_info: &wgpu::AdapterInfo, width: u32, height: u32, test_with_alpha: &[u8], diff --git a/tests/src/init.rs b/tests/src/init.rs index 67b33c0f19..60639f67a6 100644 --- a/tests/src/init.rs +++ b/tests/src/init.rs @@ -1,5 +1,4 @@ -use wgpu::{Adapter, Device, Instance, Queue}; -use wgt::{Backends, Features, Limits}; +use wgpu::{Adapter, Backends, Device, Features, Instance, Limits, Queue}; use crate::report::AdapterReport; @@ -36,7 +35,6 @@ pub fn initialize_instance(backends: wgpu::Backends, force_fxc: bool) -> Instanc } else { wgpu::Dx12Compiler::from_env().unwrap_or(wgpu::Dx12Compiler::StaticDxc) }; - let gles_minor_version = wgpu::Gles3MinorVersion::from_env().unwrap_or_default(); Instance::new(&wgpu::InstanceDescriptor { backends, flags: wgpu::InstanceFlags::debugging().with_env(), @@ -44,7 +42,7 @@ pub fn initialize_instance(backends: wgpu::Backends, force_fxc: bool) -> Instanc dx12: wgpu::Dx12BackendOptions { shader_compiler: dx12_shader_compiler, }, - gl: wgpu::GlBackendOptions { gles_minor_version }, + gl: wgpu::GlBackendOptions::from_env_or_default(), }, }) } diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 89f7e91c6e..0967817a4e 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -42,8 +42,8 @@ pub fn fail( assert!( lowered_actual.contains(&lowered_expected), concat!( - "expected validation error case-insensitively containing {:?}, ", - "but it was not present in actual error message:\n{:?}" + "expected validation error case-insensitively containing {}, ", + "but it was not present in actual error message:\n{}" ), expected_msg_substring, validation_error @@ -83,7 +83,17 @@ pub fn fail_if( } } -/// Adds the necissary main function for our gpu test harness. +/// Returns true if the provided callback fails validation. +pub fn did_fail(device: &wgpu::Device, callback: impl FnOnce() -> T) -> (bool, T) { + device.push_error_scope(wgpu::ErrorFilter::Validation); + let result = callback(); + let validation_error = pollster::block_on(device.pop_error_scope()); + let failed = validation_error.is_some(); + + (failed, result) +} + +/// Adds the necessary main function for our gpu test harness. #[macro_export] macro_rules! gpu_test_main { () => { diff --git a/tests/src/params.rs b/tests/src/params.rs index d3b7070f3e..2f6c9f17b6 100644 --- a/tests/src/params.rs +++ b/tests/src/params.rs @@ -1,5 +1,5 @@ use arrayvec::ArrayVec; -use wgt::{DownlevelCapabilities, DownlevelFlags, Features, Limits}; +use wgpu::{DownlevelCapabilities, DownlevelFlags, Features, Limits}; use crate::{ report::AdapterReport, FailureApplicationReasons, FailureBehavior, FailureCase, @@ -7,9 +7,9 @@ use crate::{ }; const LOWEST_DOWNLEVEL_PROPERTIES: wgpu::DownlevelCapabilities = DownlevelCapabilities { - flags: wgt::DownlevelFlags::empty(), - limits: wgt::DownlevelLimits {}, - shader_model: wgt::ShaderModel::Sm2, + flags: wgpu::DownlevelFlags::empty(), + limits: wgpu::DownlevelLimits {}, + shader_model: wgpu::ShaderModel::Sm2, }; /// This information determines if a test should run. @@ -120,7 +120,7 @@ impl TestInfo { // Produce a lower-case version of the adapter info, for comparison against // `parameters.skips` and `parameters.failures`. - let adapter_lowercase_info = wgt::AdapterInfo { + let adapter_lowercase_info = wgpu::AdapterInfo { name: adapter.info.name.to_lowercase(), driver: adapter.info.driver.to_lowercase(), ..adapter.info.clone() diff --git a/tests/src/run.rs b/tests/src/run.rs index 5fb15c4c3d..f2df80143f 100644 --- a/tests/src/run.rs +++ b/tests/src/run.rs @@ -12,7 +12,7 @@ use crate::{ GpuTestConfiguration, }; -/// Parameters and resources hadned to the test function. +/// Parameters and resources handed to the test function. pub struct TestingContext { pub instance: Instance, pub adapter: Adapter, diff --git a/tests/tests/binding_array/mod.rs b/tests/tests/binding_array/mod.rs index 4b8972fcdb..72b95b1dbb 100644 --- a/tests/tests/binding_array/mod.rs +++ b/tests/tests/binding_array/mod.rs @@ -2,3 +2,4 @@ mod buffers; mod sampled_textures; mod samplers; mod storage_textures; +mod validation; diff --git a/tests/tests/binding_array/validation.rs b/tests/tests/binding_array/validation.rs new file mode 100644 index 0000000000..46aa56a7e5 --- /dev/null +++ b/tests/tests/binding_array/validation.rs @@ -0,0 +1,92 @@ +use std::num::NonZeroU32; + +use wgpu::*; +use wgpu_test::{ + fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext, +}; + +#[gpu_test] +static VALIDATION: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::TEXTURE_BINDING_ARRAY) + .limits(Limits { + max_dynamic_storage_buffers_per_pipeline_layout: 1, + ..Limits::downlevel_defaults() + }) + .expect_fail( + // https://github.com/gfx-rs/wgpu/issues/6950 + FailureCase::backend(Backends::VULKAN).validation_error("has not been destroyed"), + ), + ) + .run_async(validation); + +async fn validation(ctx: TestingContext) { + // Check that you can't create a bind group with both dynamic offset and binding array + fail( + &ctx.device, + || { + ctx.device + .create_bind_group_layout(&BindGroupLayoutDescriptor { + label: Some("Test1"), + entries: &[ + BindGroupLayoutEntry { + binding: 0, + visibility: ShaderStages::FRAGMENT, + ty: BindingType::Texture { + sample_type: TextureSampleType::Float { filterable: false }, + view_dimension: TextureViewDimension::D2, + multisampled: false, + }, + count: Some(NonZeroU32::new(4).unwrap()), + }, + BindGroupLayoutEntry { + binding: 1, + visibility: ShaderStages::FRAGMENT, + ty: BindingType::Buffer { + ty: BufferBindingType::Storage { read_only: true }, + has_dynamic_offset: true, + min_binding_size: None, + }, + count: None, + }, + ], + }) + }, + Some("binding array and a dynamically offset buffer"), + ); + + // Check that you can't create a bind group with both uniform buffer and binding array + fail( + &ctx.device, + || { + ctx.device + .create_bind_group_layout(&BindGroupLayoutDescriptor { + label: Some("Test2"), + entries: &[ + BindGroupLayoutEntry { + binding: 0, + visibility: ShaderStages::FRAGMENT, + ty: BindingType::Texture { + sample_type: TextureSampleType::Float { filterable: false }, + view_dimension: TextureViewDimension::D2, + multisampled: false, + }, + count: Some(NonZeroU32::new(4).unwrap()), + }, + BindGroupLayoutEntry { + binding: 1, + visibility: ShaderStages::FRAGMENT, + ty: BindingType::Buffer { + ty: BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + ], + }) + }, + Some("binding array and a uniform buffer"), + ); +} diff --git a/tests/tests/buffer_copy.rs b/tests/tests/buffer_copy.rs index a7e9eff8cc..8968d9227f 100644 --- a/tests/tests/buffer_copy.rs +++ b/tests/tests/buffer_copy.rs @@ -1,6 +1,6 @@ //! Tests for buffer copy validation. -use wgt::BufferAddress; +use wgpu::BufferAddress; use wgpu_test::{fail_if, gpu_test, GpuTestConfiguration}; diff --git a/tests/tests/buffer_usages.rs b/tests/tests/buffer_usages.rs index 00d63adae8..52848ab4f7 100644 --- a/tests/tests/buffer_usages.rs +++ b/tests/tests/buffer_usages.rs @@ -1,8 +1,8 @@ //! Tests for buffer usages validation. +use wgpu::BufferAddress; use wgpu::{BufferUsages as Bu, MapMode as Ma}; use wgpu_test::{fail_if, gpu_test, GpuTestConfiguration, TestParameters, TestingContext}; -use wgt::BufferAddress; const BUFFER_SIZE: BufferAddress = 1234; diff --git a/tests/tests/device.rs b/tests/tests/device.rs index 41efc9b565..28992a1515 100644 --- a/tests/tests/device.rs +++ b/tests/tests/device.rs @@ -603,7 +603,7 @@ static DEVICE_DESTROY_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::ne let callback = Box::new(|reason, _m| { WAS_CALLED.store(true, std::sync::atomic::Ordering::SeqCst); assert!( - matches!(reason, wgt::DeviceLostReason::Destroyed), + matches!(reason, wgpu::DeviceLostReason::Destroyed), "Device lost info reason should match DeviceLostReason::Destroyed." ); }); @@ -669,18 +669,18 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf ), }); - let my_texture = ctx.device.create_texture(&wgt::TextureDescriptor { + let my_texture = ctx.device.create_texture(&wgpu::TextureDescriptor { label: None, - size: wgt::Extent3d { + size: wgpu::Extent3d { width: 1024, height: 512, depth_or_array_layers: 1, }, mip_level_count: 1, sample_count: 1, - dimension: wgt::TextureDimension::D2, - format: wgt::TextureFormat::Rgba8Unorm, - usage: wgt::TextureUsages::RENDER_ATTACHMENT | wgt::TextureUsages::TEXTURE_BINDING, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING, view_formats: &[], }); @@ -689,7 +689,7 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf format: None, dimension: None, usage: None, - aspect: wgt::TextureAspect::All, + aspect: wgpu::TextureAspect::All, base_mip_level: 0, mip_level_count: None, base_array_layer: 0, @@ -707,10 +707,10 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf module: &trivial_shaders_with_some_reversed_bindings, entry_point: Some("fs_main"), compilation_options: Default::default(), - targets: &[Some(wgt::ColorTargetState { - format: wgt::TextureFormat::Bgra8Unorm, + targets: &[Some(wgpu::ColorTargetState { + format: wgpu::TextureFormat::Bgra8Unorm, blend: None, - write_mask: wgt::ColorWrites::ALL, + write_mask: wgpu::ColorWrites::ALL, })], }), layout: None, @@ -723,9 +723,9 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf compilation_options: Default::default(), buffers: &[], }, - primitive: wgt::PrimitiveState::default(), + primitive: wgpu::PrimitiveState::default(), depth_stencil: None, - multisample: wgt::MultisampleState::default(), + multisample: wgpu::MultisampleState::default(), multiview: None, cache: None, }); diff --git a/tests/tests/dispatch_workgroups_indirect.rs b/tests/tests/dispatch_workgroups_indirect.rs index cd83b291c8..2c2e0aa0f3 100644 --- a/tests/tests/dispatch_workgroups_indirect.rs +++ b/tests/tests/dispatch_workgroups_indirect.rs @@ -180,7 +180,7 @@ impl TestResources { label: None, entries: &[wgpu::BindGroupLayoutEntry { binding: 0, - visibility: wgt::ShaderStages::COMPUTE, + visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, @@ -195,8 +195,8 @@ impl TestResources { .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, bind_group_layouts: &[&bgl], - push_constant_ranges: &[wgt::PushConstantRange { - stages: wgt::ShaderStages::COMPUTE, + push_constant_ranges: &[wgpu::PushConstantRange { + stages: wgpu::ShaderStages::COMPUTE, range: 0..4, }], }); diff --git a/tests/tests/external_texture.rs b/tests/tests/external_texture.rs index 60c4eb1b2f..5a35c9930a 100644 --- a/tests/tests/external_texture.rs +++ b/tests/tests/external_texture.rs @@ -145,7 +145,7 @@ static IMAGE_BITMAP_IMPORT: GpuTestConfiguration = // The layer the external image's data should end up in. let mut dest_data_layer = 0; // Color space the destination is in. - let mut dest_color_space = wgt::PredefinedColorSpace::Srgb; + let mut dest_color_space = wgpu::PredefinedColorSpace::Srgb; // If the destination image is premultiplied. let mut dest_premultiplied = false; // Size of the external copy @@ -164,7 +164,7 @@ static IMAGE_BITMAP_IMPORT: GpuTestConfiguration = match case { TestCase::Normal => {} TestCase::FlipY => { - valid = !matches!(source, wgt::ExternalImageSource::ImageBitmap(_)); + valid = !matches!(source, wgpu::ExternalImageSource::ImageBitmap(_)); src_flip_y = true; for x in 0..3 { let top = raw_image[(x, 0)]; @@ -174,7 +174,7 @@ static IMAGE_BITMAP_IMPORT: GpuTestConfiguration = } } TestCase::Premultiplied => { - valid = !matches!(source, wgt::ExternalImageSource::ImageBitmap(_)); + valid = !matches!(source, wgpu::ExternalImageSource::ImageBitmap(_)); dest_premultiplied = true; for pixel in raw_image.pixels_mut() { let mut float_pix = pixel.0.map(|v| v as f32 / 255.0); @@ -188,8 +188,8 @@ static IMAGE_BITMAP_IMPORT: GpuTestConfiguration = valid = ctx .adapter_downlevel_capabilities .flags - .contains(wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES); - dest_color_space = wgt::PredefinedColorSpace::DisplayP3; + .contains(wgpu::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES); + dest_color_space = wgpu::PredefinedColorSpace::DisplayP3; // As we don't test, we don't bother converting the color spaces // in the image as that's relatively annoying. @@ -198,7 +198,7 @@ static IMAGE_BITMAP_IMPORT: GpuTestConfiguration = valid = ctx .adapter_downlevel_capabilities .flags - .contains(wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES); + .contains(wgpu::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES); src_origin.x = 1; dest_origin.x = 1; copy_size.width = 2; @@ -274,7 +274,7 @@ static IMAGE_BITMAP_IMPORT: GpuTestConfiguration = origin: src_origin, flip_y: src_flip_y, }, - wgt::CopyExternalImageDestInfo { + wgpu::CopyExternalImageDestInfo { texture: &texture, mip_level: 0, origin: dest_origin, diff --git a/tests/tests/oob_indexing.rs b/tests/tests/oob_indexing.rs index c0c8f41f54..332105ef2f 100644 --- a/tests/tests/oob_indexing.rs +++ b/tests/tests/oob_indexing.rs @@ -1,5 +1,5 @@ +use wgpu::{Backend, Backends}; use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext}; -use wgt::{Backend, Backends}; /// Tests that writing and reading to the max length of a container (vec, mat, array) /// in the workgroup, private and function address spaces + let declarations @@ -145,7 +145,7 @@ impl TestResources { entries: &[ wgpu::BindGroupLayoutEntry { binding: 0, - visibility: wgt::ShaderStages::COMPUTE, + visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, @@ -155,7 +155,7 @@ impl TestResources { }, wgpu::BindGroupLayoutEntry { binding: 1, - visibility: wgt::ShaderStages::COMPUTE, + visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, @@ -230,3 +230,229 @@ impl TestResources { } } } + +/// Tests behavior of OOB accesses for dynamic buffers. +/// +/// This test is specific to D3D12 since Vulkan and Metal behave differently and +/// the WGSL spec allows for multiple behaviors when it comes to OOB accesses. +#[gpu_test] +static D3D12_RESTRICT_DYNAMIC_BUFFERS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) + .limits(wgpu::Limits::downlevel_defaults()) + .skip(FailureCase::backend(Backends::all() - Backends::DX12)), + ) + .run_async(d3d12_restrict_dynamic_buffers); + +async fn d3d12_restrict_dynamic_buffers(ctx: TestingContext) { + let shader_src = " + @group(0) @binding(0) + var in: u32; + @group(0) @binding(1) + var out: array; + + struct T { + @size(16) + t: u32 + } + + @group(0) @binding(2) + var in_data_uniform: array; + + @group(0) @binding(3) + var in_data_storage: array; + + @compute @workgroup_size(1) + fn main() { + let i = in; + out[0] = in_data_uniform[i].t; // should be 1 since we clamp the index + + out[1] = in_data_storage[i].t; // should be 3 since we rely on the D3D12 runtime to bound check and + // the index is still in the bounds of the buffer + + out[2] = in_data_storage[i+1].t; // should be 0 since we rely on the D3D12 runtime to bound check + } + "; + + let module = ctx + .device + .create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(shader_src.into()), + }); + + let bgl = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: true, + min_binding_size: None, + }, + count: None, + }, + ], + }); + + let layout = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }); + + let pipeline = ctx + .device + .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&layout), + module: &module, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + let in_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 4, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let out_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 3 * 4, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let in_data_uniform_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 256 + 8 * 4, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let in_data_storage_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 256 + 8 * 4, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let readback_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 3 * 4, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bgl, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: in_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: out_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &in_data_uniform_buffer, + offset: 0, + size: Some(std::num::NonZeroU64::new(4 * 4).unwrap()), + }), + }, + wgpu::BindGroupEntry { + binding: 3, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &in_data_storage_buffer, + offset: 0, + size: Some(std::num::NonZeroU64::new(4 * 4).unwrap()), + }), + }, + ], + }); + + ctx.queue + .write_buffer(&in_buffer, 0, bytemuck::bytes_of(&1_u32)); + + #[rustfmt::skip] + let in_data = [ + 1_u32, 2_u32, 2_u32, 2_u32, + 3_u32, 4_u32, 4_u32, 4_u32, + ]; + + ctx.queue + .write_buffer(&in_data_uniform_buffer, 256, bytemuck::bytes_of(&in_data)); + ctx.queue + .write_buffer(&in_data_storage_buffer, 256, bytemuck::bytes_of(&in_data)); + + let mut encoder = ctx.device.create_command_encoder(&Default::default()); + { + let mut compute_pass = encoder.begin_compute_pass(&Default::default()); + compute_pass.set_pipeline(&pipeline); + compute_pass.set_bind_group(0, &bind_group, &[256, 256]); + compute_pass.dispatch_workgroups(1, 1, 1); + } + + encoder.copy_buffer_to_buffer(&out_buffer, 0, &readback_buffer, 0, 3 * 4); + + ctx.queue.submit(Some(encoder.finish())); + + readback_buffer + .slice(..) + .map_async(wgpu::MapMode::Read, |_| {}); + + ctx.async_poll(wgpu::Maintain::wait()) + .await + .panic_on_timeout(); + + let view = readback_buffer.slice(..).get_mapped_range(); + + let current_res: [u32; 3] = *bytemuck::from_bytes(&view); + drop(view); + readback_buffer.unmap(); + + assert_eq!([1, 3, 0], current_res); +} diff --git a/tests/tests/ray_tracing/as_build.rs b/tests/tests/ray_tracing/as_build.rs index 5255694011..fe452f6812 100644 --- a/tests/tests/ray_tracing/as_build.rs +++ b/tests/tests/ray_tracing/as_build.rs @@ -1,88 +1,12 @@ -use std::{iter, mem}; +use std::iter; -use wgpu::{ - util::{BufferInitDescriptor, DeviceExt}, - *, -}; +use crate::ray_tracing::AsBuildContext; +use wgpu::util::{BufferInitDescriptor, DeviceExt}; +use wgpu::*; use wgpu_test::{ fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext, }; -struct AsBuildContext { - vertices: Buffer, - blas_size: BlasTriangleGeometrySizeDescriptor, - blas: Blas, - // Putting this last, forces the BLAS to die before the TLAS. - tlas_package: TlasPackage, -} - -impl AsBuildContext { - fn new(ctx: &TestingContext) -> Self { - let vertices = ctx.device.create_buffer_init(&BufferInitDescriptor { - label: None, - contents: &[0; mem::size_of::<[[f32; 3]; 3]>()], - usage: BufferUsages::BLAS_INPUT, - }); - - let blas_size = BlasTriangleGeometrySizeDescriptor { - vertex_format: VertexFormat::Float32x3, - vertex_count: 3, - index_format: None, - index_count: None, - flags: AccelerationStructureGeometryFlags::empty(), - }; - - let blas = ctx.device.create_blas( - &CreateBlasDescriptor { - label: Some("BLAS"), - flags: AccelerationStructureFlags::PREFER_FAST_TRACE, - update_mode: AccelerationStructureUpdateMode::Build, - }, - BlasGeometrySizeDescriptors::Triangles { - descriptors: vec![blas_size.clone()], - }, - ); - - let tlas = ctx.device.create_tlas(&CreateTlasDescriptor { - label: Some("TLAS"), - max_instances: 1, - flags: AccelerationStructureFlags::PREFER_FAST_TRACE, - update_mode: AccelerationStructureUpdateMode::Build, - }); - - let mut tlas_package = TlasPackage::new(tlas); - tlas_package[0] = Some(TlasInstance::new( - &blas, - [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], - 0, - 0xFF, - )); - - Self { - vertices, - blas_size, - blas, - tlas_package, - } - } - - fn blas_build_entry(&self) -> BlasBuildEntry { - BlasBuildEntry { - blas: &self.blas, - geometry: BlasGeometries::TriangleGeometries(vec![BlasTriangleGeometry { - size: &self.blas_size, - vertex_buffer: &self.vertices, - first_vertex: 0, - vertex_stride: mem::size_of::<[f32; 3]>() as BufferAddress, - index_buffer: None, - first_index: None, - transform_buffer: None, - transform_buffer_offset: None, - }]), - } - } -} - #[gpu_test] static UNBUILT_BLAS: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( @@ -256,7 +180,7 @@ fn out_of_order_as_build_use(ctx: TestingContext) { label: None, layout: None, module: &shader, - entry_point: Some("comp_main"), + entry_point: Some("basic_usage"), compilation_options: Default::default(), cache: None, }); @@ -343,7 +267,7 @@ static BUILD_WITH_TRANSFORM: GpuTestConfiguration = GpuTestConfiguration::new() fn build_with_transform(ctx: TestingContext) { let vertices = ctx.device.create_buffer_init(&BufferInitDescriptor { label: None, - contents: &[0; mem::size_of::<[[f32; 3]; 3]>()], + contents: &[0; size_of::<[[f32; 3]; 3]>()], usage: BufferUsages::BLAS_INPUT, }); @@ -404,7 +328,7 @@ fn build_with_transform(ctx: TestingContext) { size: &blas_size, vertex_buffer: &vertices, first_vertex: 0, - vertex_stride: mem::size_of::<[f32; 3]>() as BufferAddress, + vertex_stride: size_of::<[f32; 3]>() as BufferAddress, index_buffer: None, first_index: None, transform_buffer: Some(&transform), diff --git a/tests/tests/ray_tracing/as_create.rs b/tests/tests/ray_tracing/as_create.rs index 617852333b..64459fabff 100644 --- a/tests/tests/ray_tracing/as_create.rs +++ b/tests/tests/ray_tracing/as_create.rs @@ -3,9 +3,9 @@ use wgpu::{ AccelerationStructureUpdateMode, BlasGeometrySizeDescriptors, BlasTriangleGeometrySizeDescriptor, CreateBlasDescriptor, }; +use wgpu::{IndexFormat, VertexFormat}; use wgpu_macros::gpu_test; use wgpu_test::{fail, GpuTestConfiguration, TestParameters, TestingContext}; -use wgt::{IndexFormat, VertexFormat}; #[gpu_test] static BLAS_INVALID_VERTEX_FORMAT: GpuTestConfiguration = GpuTestConfiguration::new() diff --git a/tests/tests/ray_tracing/as_use_after_free.rs b/tests/tests/ray_tracing/as_use_after_free.rs index fcbc75b3a5..5692c30c98 100644 --- a/tests/tests/ray_tracing/as_use_after_free.rs +++ b/tests/tests/ray_tracing/as_use_after_free.rs @@ -108,7 +108,7 @@ fn acceleration_structure_use_after_free(ctx: TestingContext) { label: None, layout: None, module: &shader, - entry_point: Some("comp_main"), + entry_point: Some("basic_usage"), compilation_options: Default::default(), cache: None, }); diff --git a/tests/tests/ray_tracing/mod.rs b/tests/tests/ray_tracing/mod.rs index e204392d2e..a502ddccba 100644 --- a/tests/tests/ray_tracing/mod.rs +++ b/tests/tests/ray_tracing/mod.rs @@ -1,4 +1,93 @@ +use std::mem; +use wgpu::util::BufferInitDescriptor; +use wgpu::{ + util::DeviceExt, Blas, BlasBuildEntry, BlasGeometries, BlasGeometrySizeDescriptors, + BlasTriangleGeometry, BlasTriangleGeometrySizeDescriptor, Buffer, CreateBlasDescriptor, + CreateTlasDescriptor, TlasInstance, TlasPackage, +}; +use wgpu::{ + AccelerationStructureFlags, AccelerationStructureGeometryFlags, + AccelerationStructureUpdateMode, BufferAddress, BufferUsages, VertexFormat, +}; +use wgpu_test::TestingContext; + mod as_build; mod as_create; mod as_use_after_free; mod scene; +mod shader; + +pub struct AsBuildContext { + vertices: Buffer, + blas_size: BlasTriangleGeometrySizeDescriptor, + blas: Blas, + // Putting this last, forces the BLAS to die before the TLAS. + tlas_package: TlasPackage, +} + +impl AsBuildContext { + pub fn new(ctx: &TestingContext) -> Self { + let vertices = ctx.device.create_buffer_init(&BufferInitDescriptor { + label: None, + contents: &[0; mem::size_of::<[[f32; 3]; 3]>()], + usage: BufferUsages::BLAS_INPUT, + }); + + let blas_size = BlasTriangleGeometrySizeDescriptor { + vertex_format: VertexFormat::Float32x3, + vertex_count: 3, + index_format: None, + index_count: None, + flags: AccelerationStructureGeometryFlags::empty(), + }; + + let blas = ctx.device.create_blas( + &CreateBlasDescriptor { + label: Some("BLAS"), + flags: AccelerationStructureFlags::PREFER_FAST_TRACE, + update_mode: AccelerationStructureUpdateMode::Build, + }, + BlasGeometrySizeDescriptors::Triangles { + descriptors: vec![blas_size.clone()], + }, + ); + + let tlas = ctx.device.create_tlas(&CreateTlasDescriptor { + label: Some("TLAS"), + max_instances: 1, + flags: AccelerationStructureFlags::PREFER_FAST_TRACE, + update_mode: AccelerationStructureUpdateMode::Build, + }); + + let mut tlas_package = TlasPackage::new(tlas); + tlas_package[0] = Some(TlasInstance::new( + &blas, + [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], + 0, + 0xFF, + )); + + Self { + vertices, + blas_size, + blas, + tlas_package, + } + } + + pub fn blas_build_entry(&self) -> BlasBuildEntry { + BlasBuildEntry { + blas: &self.blas, + geometry: BlasGeometries::TriangleGeometries(vec![BlasTriangleGeometry { + size: &self.blas_size, + vertex_buffer: &self.vertices, + first_vertex: 0, + vertex_stride: mem::size_of::<[f32; 3]>() as BufferAddress, + index_buffer: None, + first_index: None, + transform_buffer: None, + transform_buffer_offset: None, + }]), + } + } +} diff --git a/tests/tests/ray_tracing/shader.rs b/tests/tests/ray_tracing/shader.rs new file mode 100644 index 0000000000..08e4155235 --- /dev/null +++ b/tests/tests/ray_tracing/shader.rs @@ -0,0 +1,95 @@ +use crate::ray_tracing::AsBuildContext; +use wgpu::BufferUsages; +use wgpu::{ + include_wgsl, BindGroupDescriptor, BindGroupEntry, BindingResource, BufferDescriptor, + CommandEncoderDescriptor, ComputePassDescriptor, ComputePipelineDescriptor, +}; +use wgpu_macros::gpu_test; +use wgpu_test::{GpuTestConfiguration, TestParameters, TestingContext}; + +const STRUCT_SIZE: wgpu::BufferAddress = 176; + +#[gpu_test] +static ACCESS_ALL_STRUCT_MEMBERS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters(TestParameters::default().test_features_limits().features( + wgpu::Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE + | wgpu::Features::EXPERIMENTAL_RAY_QUERY, + )) + .run_sync(access_all_struct_members); + +fn access_all_struct_members(ctx: TestingContext) { + let buf = ctx.device.create_buffer(&BufferDescriptor { + label: None, + size: STRUCT_SIZE, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }); + // + // Create a clean `AsBuildContext` + // + + let as_ctx = AsBuildContext::new(&ctx); + + let mut encoder_build = ctx + .device + .create_command_encoder(&CommandEncoderDescriptor { + label: Some("Build"), + }); + + encoder_build + .build_acceleration_structures([&as_ctx.blas_build_entry()], [&as_ctx.tlas_package]); + + ctx.queue.submit([encoder_build.finish()]); + + // + // Create shader to use tlas with + // + + let shader = ctx + .device + .create_shader_module(include_wgsl!("shader.wgsl")); + let compute_pipeline = ctx + .device + .create_compute_pipeline(&ComputePipelineDescriptor { + label: None, + layout: None, + module: &shader, + entry_point: Some("all_of_struct"), + compilation_options: Default::default(), + cache: None, + }); + + let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor { + label: None, + layout: &compute_pipeline.get_bind_group_layout(0), + entries: &[ + BindGroupEntry { + binding: 0, + resource: BindingResource::AccelerationStructure(as_ctx.tlas_package.tlas()), + }, + BindGroupEntry { + binding: 1, + resource: BindingResource::Buffer(buf.as_entire_buffer_binding()), + }, + ], + }); + + // + // Submit once to check for no issues + // + + let mut encoder_compute = ctx + .device + .create_command_encoder(&CommandEncoderDescriptor::default()); + { + let mut pass = encoder_compute.begin_compute_pass(&ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + pass.set_pipeline(&compute_pipeline); + pass.set_bind_group(0, Some(&bind_group), &[]); + pass.dispatch_workgroups(1, 1, 1) + } + + ctx.queue.submit([encoder_compute.finish()]); +} diff --git a/tests/tests/ray_tracing/shader.wgsl b/tests/tests/ray_tracing/shader.wgsl index 370d69e1c3..2130b8d9ae 100644 --- a/tests/tests/ray_tracing/shader.wgsl +++ b/tests/tests/ray_tracing/shader.wgsl @@ -1,11 +1,51 @@ @group(0) @binding(0) var acc_struct: acceleration_structure; +struct Intersection { + kind: u32, + t: f32, + instance_custom_data: u32, + instance_index: u32, + sbt_record_offset: u32, + geometry_index: u32, + primitive_index: u32, + barycentrics: vec2, + front_face: u32, + object_to_world: mat4x3, + world_to_object: mat4x3, +} + +@group(0) @binding(1) +var out: Intersection; + @workgroup_size(1) @compute -fn comp_main() { +fn basic_usage() { var rq: ray_query; rayQueryInitialize(&rq, acc_struct, RayDesc(0u, 0xFFu, 0.001, 100000.0, vec3f(0.0, 0.0, 0.0), vec3f(0.0, 0.0, 1.0))); rayQueryProceed(&rq); let intersection = rayQueryGetCommittedIntersection(&rq); +} + +@workgroup_size(1) +@compute +fn all_of_struct() { + var rq: ray_query; + rayQueryInitialize(&rq, acc_struct, RayDesc(0u, 0xFFu, 0.0, 0.0, vec3f(0.0, 0.0, 1.0), vec3f(0.0, 0.0, 1.0))); + rayQueryProceed(&rq); + let intersection = rayQueryGetCommittedIntersection(&rq); + // this prevents optimisation as we use the fields + out = Intersection( + intersection.kind, + intersection.t, + intersection.instance_custom_data, + intersection.instance_index, + intersection.sbt_record_offset, + intersection.geometry_index, + intersection.primitive_index, + intersection.barycentrics, + u32(intersection.front_face), + intersection.world_to_object, + intersection.object_to_world, + ); } \ No newline at end of file diff --git a/tests/tests/root.rs b/tests/tests/root.rs index 9e71ff60f9..d0a37ba696 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -44,15 +44,18 @@ mod ray_tracing; mod render_pass_ownership; mod resource_descriptor_accessor; mod resource_error; +mod samplers; mod scissor_tests; mod shader; mod shader_primitive_index; mod shader_view_format; mod subgroup_operations; +mod texture_binding; mod texture_blit; mod texture_bounds; mod texture_view_creation; mod transfer; +mod transition_resources; mod vertex_formats; mod vertex_indices; mod write_texture; diff --git a/tests/tests/samplers.rs b/tests/tests/samplers.rs new file mode 100644 index 0000000000..933cd52893 --- /dev/null +++ b/tests/tests/samplers.rs @@ -0,0 +1,543 @@ +//! D3D12 samplers are fun and we're doing a decent amount of polyfilling with them. +//! +//! Do some tests to ensure things are working correctly and nothing gets mad. + +use wgpu_test::{did_fail, gpu_test, valid, GpuTestConfiguration, TestParameters, TestingContext}; + +// A number large enough to likely cause sampler caches to run out of space +// on some devices. +const PROBABLY_PROBLEMATIC_SAMPLER_COUNT: u32 = 8 * 1024; + +#[gpu_test] +static SAMPLER_DEDUPLICATION: GpuTestConfiguration = + GpuTestConfiguration::new().run_sync(sampler_deduplication); + +// Create a large number of samplers from the same two descriptors. +// +// Sampler deduplication in the backend should ensure this doesn't cause any issues. +fn sampler_deduplication(ctx: TestingContext) { + // Create 2 different sampler descriptors + let desc1 = wgpu::SamplerDescriptor { + label: Some("sampler1"), + address_mode_u: wgpu::AddressMode::ClampToEdge, + address_mode_v: wgpu::AddressMode::ClampToEdge, + address_mode_w: wgpu::AddressMode::ClampToEdge, + mag_filter: wgpu::FilterMode::Nearest, + min_filter: wgpu::FilterMode::Nearest, + mipmap_filter: wgpu::FilterMode::Nearest, + lod_min_clamp: 0.0, + lod_max_clamp: 100.0, + compare: None, + anisotropy_clamp: 1, + border_color: None, + }; + + let desc2 = wgpu::SamplerDescriptor { + label: Some("sampler2"), + address_mode_u: wgpu::AddressMode::ClampToEdge, + address_mode_v: wgpu::AddressMode::ClampToEdge, + address_mode_w: wgpu::AddressMode::ClampToEdge, + mag_filter: wgpu::FilterMode::Linear, + min_filter: wgpu::FilterMode::Linear, + mipmap_filter: wgpu::FilterMode::Linear, + lod_min_clamp: 0.0, + lod_max_clamp: 100.0, + compare: None, + anisotropy_clamp: 1, + border_color: None, + }; + + // Now create a bunch of samplers with these descriptors + let samplers = (0..PROBABLY_PROBLEMATIC_SAMPLER_COUNT) + .map(|i| { + let desc = if i % 2 == 0 { &desc1 } else { &desc2 }; + valid(&ctx.device, || ctx.device.create_sampler(desc)) + }) + .collect::>(); + + drop(samplers); +} + +#[gpu_test] +static SAMPLER_CREATION_FAILURE: GpuTestConfiguration = + GpuTestConfiguration::new().run_sync(sampler_creation_failure); + +/// We want to test that sampler creation properly fails when we hit internal sampler +/// cache limits. As we don't actually know what the limit is, we first create as many +/// samplers as we can until we get the first failure. +/// +/// This failure being caught ensures that the error catching machinery on samplers +/// is working as expected. +/// +/// We then clear all samplers and poll the device, which should leave the caches +/// completely empty. +/// +/// We then try to create the same number of samplers to ensure the cache was entirely +/// cleared. +fn sampler_creation_failure(ctx: TestingContext) { + let desc = wgpu::SamplerDescriptor { + label: Some("sampler1"), + address_mode_u: wgpu::AddressMode::ClampToEdge, + address_mode_v: wgpu::AddressMode::ClampToEdge, + address_mode_w: wgpu::AddressMode::ClampToEdge, + mag_filter: wgpu::FilterMode::Nearest, + min_filter: wgpu::FilterMode::Nearest, + mipmap_filter: wgpu::FilterMode::Nearest, + lod_min_clamp: 0.0, + lod_max_clamp: 100.0, + compare: None, + anisotropy_clamp: 1, + border_color: None, + }; + + let mut sampler_storage = Vec::with_capacity(PROBABLY_PROBLEMATIC_SAMPLER_COUNT as usize); + + for i in 0..PROBABLY_PROBLEMATIC_SAMPLER_COUNT { + let (failed, sampler) = did_fail(&ctx.device, || { + ctx.device.create_sampler(&wgpu::SamplerDescriptor { + lod_min_clamp: i as f32 * 0.01, + ..desc + }) + }); + + if failed { + break; + } + + sampler_storage.push(sampler); + } + + let failed_count = sampler_storage.len(); + + sampler_storage.clear(); + ctx.device.poll(wgpu::Maintain::Wait); + + for i in 0..failed_count { + valid(&ctx.device, || { + eprintln!("Trying to create sampler {}", i); + let sampler = ctx.device.create_sampler(&wgpu::SamplerDescriptor { + lod_min_clamp: i as f32 * 0.01, + // Change the max clamp to ensure the sampler is using different cache slots from + // the previous run. + lod_max_clamp: 200.0, + ..desc + }); + sampler_storage.push(sampler); + }); + } +} + +const SINGLE_GROUP_BINDINGS: &str = r#" +@group(0) @binding(0) var texture: texture_2d; +@group(0) @binding(1) var sampler0: sampler; +@group(0) @binding(2) var sampler1: sampler; +@group(0) @binding(3) var sampler2: sampler; + +@group(1) @binding(0) var results: array; +"#; + +const MULTI_GROUP_BINDINGS: &str = r#" +@group(0) @binding(0) var texture: texture_2d; +@group(0) @binding(1) var sampler0: sampler; +@group(1) @binding(0) var sampler1: sampler; +@group(2) @binding(0) var sampler2: sampler; + +@group(3) @binding(0) var results: array; +"#; + +const SAMPLER_CODE: &str = r#" +@compute @workgroup_size(1, 1, 1) +fn cs_main() { + // When sampling a 2x2 texture at the bottom left, we can change the address mode + // on S/T to get different values. This allows us to make sure the right sampler + // is being used. + results[0] = textureSampleLevel(texture, sampler0, vec2f(0.0, 1.0), 0.0); + results[1] = textureSampleLevel(texture, sampler1, vec2f(0.0, 1.0), 0.0); + results[2] = textureSampleLevel(texture, sampler2, vec2f(0.0, 1.0), 0.0); +} +"#; + +enum GroupType { + Single, + Multi, +} + +#[gpu_test] +static SAMPLER_SINGLE_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .test_features_limits() + // In OpenGL textures cannot be used with multiple samplers. + .skip(wgpu_test::FailureCase::backend(wgpu::Backends::GL)), + ) + .run_sync(|ctx| sampler_bind_group(ctx, GroupType::Single)); + +#[gpu_test] +static SAMPLER_MULTI_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .test_features_limits() + // In OpenGL textures cannot be used with multiple samplers. + .skip(wgpu_test::FailureCase::backend(wgpu::Backends::GL)), + ) + .run_sync(|ctx| sampler_bind_group(ctx, GroupType::Multi)); + +fn sampler_bind_group(ctx: TestingContext, group_type: GroupType) { + let bindings = match group_type { + GroupType::Single => SINGLE_GROUP_BINDINGS, + GroupType::Multi => MULTI_GROUP_BINDINGS, + }; + + let full_shader = format!("{}\n{}", bindings, SAMPLER_CODE); + + let module = ctx + .device + .create_shader_module(wgpu::ShaderModuleDescriptor { + source: wgpu::ShaderSource::Wgsl(full_shader.into()), + label: None, + }); + + let mut bind_group_layouts = Vec::new(); + + match group_type { + GroupType::Single => { + let bgl = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("combination_bgl"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + ], + }); + + bind_group_layouts.push(bgl); + } + GroupType::Multi => { + let bgl0 = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("multiple_bgl0"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + ], + }); + + let bgl1 = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("multiple_bgl1"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }], + }); + + let bgl2 = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("multiple_bgl2"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }], + }); + + bind_group_layouts.push(bgl0); + bind_group_layouts.push(bgl1); + bind_group_layouts.push(bgl2); + } + } + + let output_bgl = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("output_bgl"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }], + }); + + let mut bgl_references: Vec<_> = bind_group_layouts.iter().collect(); + + bgl_references.push(&output_bgl); + + let pipeline_layout = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("pipeline_layout"), + bind_group_layouts: &bgl_references, + push_constant_ranges: &[], + }); + + let input_image = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: Some("input_image"), + size: wgpu::Extent3d { + width: 2, + height: 2, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + view_formats: &[], + }); + + let input_image_view = input_image.create_view(&wgpu::TextureViewDescriptor::default()); + + let image_data: [u8; 16] = [ + 255, 0, 0, 255, /* */ 0, 255, 0, 255, // + 0, 0, 255, 255, /* */ 255, 255, 255, 255, // + ]; + + ctx.queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: &input_image, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + &image_data, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(8), + rows_per_image: None, + }, + wgpu::Extent3d { + width: 2, + height: 2, + depth_or_array_layers: 1, + }, + ); + + let address_modes = [ + ( + wgpu::AddressMode::ClampToEdge, + wgpu::AddressMode::ClampToEdge, + ), + (wgpu::AddressMode::Repeat, wgpu::AddressMode::ClampToEdge), + (wgpu::AddressMode::ClampToEdge, wgpu::AddressMode::Repeat), + ]; + + let samplers = address_modes.map(|(address_mode_u, address_mode_v)| { + ctx.device.create_sampler(&wgpu::SamplerDescriptor { + label: None, + address_mode_u, + address_mode_v, + address_mode_w: wgpu::AddressMode::ClampToEdge, + mag_filter: wgpu::FilterMode::Linear, + min_filter: wgpu::FilterMode::Linear, + mipmap_filter: wgpu::FilterMode::Nearest, + lod_min_clamp: 0.0, + lod_max_clamp: 100.0, + compare: None, + anisotropy_clamp: 1, + border_color: None, + }) + }); + + let mut bind_groups = Vec::new(); + + match group_type { + GroupType::Single => { + let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("combination_bg"), + layout: &bind_group_layouts[0], + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&input_image_view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::Sampler(&samplers[0]), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::Sampler(&samplers[1]), + }, + wgpu::BindGroupEntry { + binding: 3, + resource: wgpu::BindingResource::Sampler(&samplers[2]), + }, + ], + }); + + bind_groups.push(bg); + } + GroupType::Multi => { + let bg0 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("multiple_bg0"), + layout: &bind_group_layouts[0], + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&input_image_view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::Sampler(&samplers[0]), + }, + ], + }); + + let bg1 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("multiple_bg1"), + layout: &bind_group_layouts[1], + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::Sampler(&samplers[1]), + }], + }); + + let bg2 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("multiple_bg2"), + layout: &bind_group_layouts[2], + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::Sampler(&samplers[2]), + }], + }); + + bind_groups.push(bg0); + bind_groups.push(bg1); + bind_groups.push(bg2); + } + } + + let output_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("output_buffer"), + size: 48, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let transfer_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("transfer_buffer"), + size: 48, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let output_bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("output_bg"), + layout: &output_bgl, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &output_buffer, + offset: 0, + size: None, + }), + }], + }); + + let mut bg_references = bind_groups.iter().collect::>(); + + bg_references.push(&output_bg); + + let pipeline = ctx + .device + .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("pipeline"), + layout: Some(&pipeline_layout), + module: &module, + entry_point: Some("cs_main"), + cache: None, + compilation_options: Default::default(), + }); + + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("encoder"), + }); + + { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + cpass.set_pipeline(&pipeline); + for (i, &bg) in bg_references.iter().enumerate() { + cpass.set_bind_group(i as u32, bg, &[]); + } + cpass.dispatch_workgroups(1, 1, 1); + } + + encoder.copy_buffer_to_buffer(&output_buffer, 0, &transfer_buffer, 0, 48); + + ctx.queue.submit([encoder.finish()]); + let buffer_slice = transfer_buffer.slice(..); + buffer_slice.map_async(wgpu::MapMode::Read, |_| {}); + + ctx.device.poll(wgpu::Maintain::Wait); + + let buffer_data = buffer_slice.get_mapped_range(); + + let f32_buffer: &[f32] = bytemuck::cast_slice(&buffer_data); + + let correct_values: [f32; 12] = [ + 0.0, 0.0, 1.0, 1.0, // + 0.5, 0.5, 1.0, 1.0, // + 0.5, 0.0, 0.5, 1.0, // + ]; + let iter = f32_buffer.iter().zip(correct_values.iter()); + for (&result, &value) in iter { + approx::assert_relative_eq!(result, value, max_relative = 0.02); + } +} diff --git a/tests/tests/shader/numeric_builtins.rs b/tests/tests/shader/numeric_builtins.rs index db8461a3a0..bbdd379ba0 100644 --- a/tests/tests/shader/numeric_builtins.rs +++ b/tests/tests/shader/numeric_builtins.rs @@ -84,7 +84,7 @@ fn create_int64_atomic_min_max_test() -> Vec { static INT64_ATOMIC_MIN_MAX: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() - .features(wgt::Features::SHADER_INT64 | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX) + .features(wgpu::Features::SHADER_INT64 | wgpu::Features::SHADER_INT64_ATOMIC_MIN_MAX) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) .limits(Limits::downlevel_defaults()), ) @@ -139,7 +139,7 @@ fn create_int64_atomic_all_ops_test() -> Vec { static INT64_ATOMIC_ALL_OPS: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() - .features(wgt::Features::SHADER_INT64 | wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS) + .features(wgpu::Features::SHADER_INT64 | wgpu::Features::SHADER_INT64_ATOMIC_ALL_OPS) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) .limits(Limits::downlevel_defaults()), ) @@ -183,7 +183,7 @@ fn create_float32_atomic_test() -> Vec { static FLOAT32_ATOMIC: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() - .features(wgt::Features::SHADER_FLOAT32_ATOMIC) + .features(wgpu::Features::SHADER_FLOAT32_ATOMIC) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) .limits(Limits::downlevel_defaults()), ) diff --git a/tests/tests/texture_binding/mod.rs b/tests/tests/texture_binding/mod.rs new file mode 100644 index 0000000000..f218462650 --- /dev/null +++ b/tests/tests/texture_binding/mod.rs @@ -0,0 +1,64 @@ +use wgpu::{ + include_wgsl, BindGroupDescriptor, BindGroupEntry, BindingResource, ComputePassDescriptor, + ComputePipelineDescriptor, DownlevelFlags, Extent3d, Features, TextureDescriptor, + TextureDimension, TextureFormat, TextureUsages, +}; +use wgpu_macros::gpu_test; +use wgpu_test::{GpuTestConfiguration, TestParameters, TestingContext}; + +#[gpu_test] +static TEXTURE_BINDING: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .test_features_limits() + .downlevel_flags(DownlevelFlags::WEBGPU_TEXTURE_FORMAT_SUPPORT) + .features(Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES), + ) + .run_sync(texture_binding); + +fn texture_binding(ctx: TestingContext) { + let texture = ctx.device.create_texture(&TextureDescriptor { + label: None, + size: Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::Rg32Float, + usage: TextureUsages::STORAGE_BINDING, + view_formats: &[], + }); + let shader = ctx + .device + .create_shader_module(include_wgsl!("shader.wgsl")); + let pipeline = ctx + .device + .create_compute_pipeline(&ComputePipelineDescriptor { + label: None, + layout: None, + module: &shader, + entry_point: None, + compilation_options: Default::default(), + cache: None, + }); + let bind = ctx.device.create_bind_group(&BindGroupDescriptor { + label: None, + layout: &pipeline.get_bind_group_layout(0), + entries: &[BindGroupEntry { + binding: 0, + resource: BindingResource::TextureView(&texture.create_view(&Default::default())), + }], + }); + + let mut encoder = ctx.device.create_command_encoder(&Default::default()); + { + let mut pass = encoder.begin_compute_pass(&ComputePassDescriptor::default()); + pass.set_pipeline(&pipeline); + pass.set_bind_group(0, &bind, &[]); + pass.dispatch_workgroups(1, 1, 1); + } + ctx.queue.submit([encoder.finish()]); +} diff --git a/tests/tests/texture_binding/shader.wgsl b/tests/tests/texture_binding/shader.wgsl new file mode 100644 index 0000000000..74239a7237 --- /dev/null +++ b/tests/tests/texture_binding/shader.wgsl @@ -0,0 +1,6 @@ +@group(0) @binding(0) +var tex: texture_storage_2d; + +@compute @workgroup_size(1) fn csStore() { + _ = textureLoad(tex, vec2u(0)); +} \ No newline at end of file diff --git a/tests/tests/transition_resources.rs b/tests/tests/transition_resources.rs new file mode 100644 index 0000000000..5ae8f9a272 --- /dev/null +++ b/tests/tests/transition_resources.rs @@ -0,0 +1,35 @@ +use wgpu_test::{gpu_test, GpuTestConfiguration}; + +#[gpu_test] +static TRANSITION_RESOURCES: GpuTestConfiguration = GpuTestConfiguration::new().run_sync(|ctx| { + let texture = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: None, + size: wgpu::Extent3d { + width: 32, + height: 32, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + encoder.transition_resources( + std::iter::empty(), + [wgpu::TextureTransition { + texture: &texture, + selector: None, + state: wgpu::TextureUses::COLOR_TARGET, + }] + .into_iter(), + ); + + ctx.queue.submit([encoder.finish()]); +}); diff --git a/tests/tests/vertex_formats/mod.rs b/tests/tests/vertex_formats/mod.rs index 2f2b16a9c7..f1df231fff 100644 --- a/tests/tests/vertex_formats/mod.rs +++ b/tests/tests/vertex_formats/mod.rs @@ -21,7 +21,7 @@ enum TestCase { struct Test<'a> { case: TestCase, entry_point: &'a str, - attributes: &'a [wgt::VertexAttribute], + attributes: &'a [wgpu::VertexAttribute], input: &'a [u8], checksums: &'a [f32], } diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs index 7dd4779964..f246b0e350 100644 --- a/tests/tests/vertex_indices/mod.rs +++ b/tests/tests/vertex_indices/mod.rs @@ -8,8 +8,8 @@ use std::{num::NonZeroU64, ops::Range}; use itertools::Itertools; use strum::IntoEnumIterator; use wgpu::util::{BufferInitDescriptor, DeviceExt, RenderEncoder}; +use wgpu::RenderBundleDescriptor; use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext}; -use wgt::RenderBundleDescriptor; /// Generic struct representing a draw call struct Draw { diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index 4e2ed64d08..41481e3cda 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -38,7 +38,7 @@ unexpected_cfgs = { level = "warn", check-cfg = ['cfg(wgpu_validate_locks)'] } ## Internally count resources and events for debugging purposes. If the counters ## feature is disabled, the counting infrastructure is removed from the build and ## the exposed counters always return 0. -counters = ["wgt/counters"] +counters = ["wgpu-types/counters"] ## Log all API entry points at info instead of trace level. ## Also, promotes certain debug log calls to info. @@ -49,18 +49,18 @@ resource_log_info = [] ## Support the Renderdoc graphics debugger: ## -renderdoc = ["hal/renderdoc"] +renderdoc = ["wgpu-hal/renderdoc"] ## Apply run-time checks, even in release builds. These are in addition ## to the validation carried out at public APIs in all builds. -strict_asserts = ["wgt/strict_asserts"] +strict_asserts = ["wgpu-types/strict_asserts"] ## Validates indirect draw/dispatch calls. This will also enable naga's ## WGSL frontend since we use a WGSL compute shader to do the validation. indirect-validation = ["naga/wgsl-in"] ## Enables serialization via `serde` on common wgpu types. -serde = ["dep:serde", "wgt/serde", "arrayvec/serde"] +serde = ["dep:serde", "wgpu-types/serde", "arrayvec/serde", "hashbrown/serde"] ## Enable API tracing. trace = ["dep:ron", "serde", "naga/serialize"] @@ -92,36 +92,41 @@ spirv = ["naga/spv-in", "dep:bytemuck"] ## but on a wasm binary compiled without atomics we know we are definitely ## not in a multithreaded environment. fragile-send-sync-non-atomic-wasm = [ - "hal/fragile-send-sync-non-atomic-wasm", - "wgt/fragile-send-sync-non-atomic-wasm", + "wgpu-hal/fragile-send-sync-non-atomic-wasm", + "wgpu-types/fragile-send-sync-non-atomic-wasm", ] #! ### Backends, passed through to wgpu-hal # -------------------------------------------------------------------- ## Enable the `metal` backend. -metal = ["hal/metal"] +metal = ["wgpu-hal/metal"] ## Enable the `vulkan` backend. -vulkan = ["hal/vulkan"] +vulkan = ["wgpu-hal/vulkan"] ## Enable the `GLES` backend. ## ## This is used for all of GLES, OpenGL, and WebGL. -gles = ["hal/gles"] +gles = ["wgpu-hal/gles"] ## Enable the `dx12` backend. -dx12 = ["hal/dx12"] +dx12 = ["wgpu-hal/dx12"] [dependencies] +naga.workspace = true +wgpu-hal.workspace = true +wgpu-types.workspace = true + arrayvec.workspace = true bit-vec.workspace = true bitflags.workspace = true bytemuck = { workspace = true, optional = true } document-features.workspace = true +hashbrown.workspace = true indexmap.workspace = true log.workspace = true -once_cell.workspace = true +once_cell = { workspace = true, features = ["std"] } parking_lot.workspace = true profiling = { workspace = true, default-features = false } raw-window-handle = { workspace = true, optional = true } @@ -131,20 +136,5 @@ serde = { workspace = true, features = ["default", "derive"], optional = true } smallvec.workspace = true thiserror.workspace = true -[dependencies.naga] -path = "../naga" -version = "24.0.0" - -[dependencies.wgt] -package = "wgpu-types" -path = "../wgpu-types" -version = "24.0.0" - -[dependencies.hal] -package = "wgpu-hal" -path = "../wgpu-hal" -version = "24.0.0" -default-features = false - [build-dependencies] cfg_aliases.workspace = true diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index 206eb22ed4..436ea8852a 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -68,6 +68,10 @@ pub enum CreateBindGroupLayoutError { }, #[error(transparent)] TooManyBindings(BindingTypeMaxCountError), + #[error("Bind groups may not contain both a binding array and a dynamically offset buffer")] + ContainsBothBindingArrayAndDynamicOffsetArray, + #[error("Bind groups may not contain both a binding array and a uniform buffer")] + ContainsBothBindingArrayAndUniformBuffer, #[error("Binding index {binding} is greater than the maximum number {maximum}")] InvalidBindingIndex { binding: u32, maximum: u32 }, #[error("Invalid visibility {0:?}")] @@ -319,6 +323,7 @@ pub(crate) struct BindingTypeMaxCountValidator { storage_textures: PerStageBindingTypeCounter, uniform_buffers: PerStageBindingTypeCounter, acceleration_structures: PerStageBindingTypeCounter, + has_bindless_array: bool, } impl BindingTypeMaxCountValidator { @@ -358,6 +363,9 @@ impl BindingTypeMaxCountValidator { self.acceleration_structures.add(binding.visibility, count); } } + if binding.count.is_some() { + self.has_bindless_array = true; + } } pub(crate) fn merge(&mut self, other: &Self) { @@ -409,6 +417,23 @@ impl BindingTypeMaxCountValidator { )?; Ok(()) } + + /// Validate that the bind group layout does not contain both a binding array and a dynamic offset array. + /// + /// This allows us to use `UPDATE_AFTER_BIND` on vulkan for bindless arrays. Vulkan does not allow + /// `UPDATE_AFTER_BIND` on dynamic offset arrays. See + pub(crate) fn validate_binding_arrays(&self) -> Result<(), CreateBindGroupLayoutError> { + let has_dynamic_offset_array = + self.dynamic_uniform_buffers > 0 || self.dynamic_storage_buffers > 0; + let has_uniform_buffer = self.uniform_buffers.max().1 > 0; + if self.has_bindless_array && has_dynamic_offset_array { + return Err(CreateBindGroupLayoutError::ContainsBothBindingArrayAndDynamicOffsetArray); + } + if self.has_bindless_array && has_uniform_buffer { + return Err(CreateBindGroupLayoutError::ContainsBothBindingArrayAndUniformBuffer); + } + Ok(()) + } } /// Bindable resource and the slot to bind it to. diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 6aa614ac5f..23e4532c39 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -683,7 +683,7 @@ fn set_index_buffer( state .trackers .buffers - .merge_single(&buffer, hal::BufferUses::INDEX)?; + .merge_single(&buffer, wgt::BufferUses::INDEX)?; buffer.same_device(&state.device)?; buffer.check_usage(wgt::BufferUsages::INDEX)?; @@ -725,7 +725,7 @@ fn set_vertex_buffer( state .trackers .buffers - .merge_single(&buffer, hal::BufferUses::VERTEX)?; + .merge_single(&buffer, wgt::BufferUses::VERTEX)?; buffer.same_device(&state.device)?; buffer.check_usage(wgt::BufferUsages::VERTEX)?; @@ -864,7 +864,7 @@ fn multi_draw_indirect( state .trackers .buffers - .merge_single(&buffer, hal::BufferUses::INDIRECT)?; + .merge_single(&buffer, wgt::BufferUses::INDIRECT)?; buffer.same_device(&state.device)?; buffer.check_usage(wgt::BufferUsages::INDIRECT)?; diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index 0811c2ac42..6efb7eeb54 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -15,11 +15,14 @@ use crate::{ ParentDevice, ResourceErrorIdent, Texture, TextureClearMode, }, snatch::SnatchGuard, - track::{TextureSelector, TextureTrackerSetSingle}, + track::TextureTrackerSetSingle, }; use thiserror::Error; -use wgt::{math::align_to, BufferAddress, BufferUsages, ImageSubresourceRange, TextureAspect}; +use wgt::{ + math::align_to, BufferAddress, BufferUsages, ImageSubresourceRange, TextureAspect, + TextureSelector, +}; /// Error encountered while attempting a clear. #[derive(Clone, Debug, Error)] @@ -107,7 +110,7 @@ impl Global { let dst_pending = cmd_buf_data .trackers .buffers - .set_single(&dst_buffer, hal::BufferUses::COPY_DST); + .set_single(&dst_buffer, wgt::BufferUses::COPY_DST); let snatch_guard = dst_buffer.device.snatchable_lock.read(); let dst_raw = dst_buffer.try_raw(&snatch_guard)?; @@ -269,12 +272,12 @@ pub(crate) fn clear_texture( // Issue the right barrier. let clear_usage = match dst_texture.clear_mode { - TextureClearMode::BufferCopy => hal::TextureUses::COPY_DST, + TextureClearMode::BufferCopy => wgt::TextureUses::COPY_DST, TextureClearMode::RenderPass { is_color: false, .. - } => hal::TextureUses::DEPTH_STENCIL_WRITE, + } => wgt::TextureUses::DEPTH_STENCIL_WRITE, TextureClearMode::Surface { .. } | TextureClearMode::RenderPass { is_color: true, .. } => { - hal::TextureUses::COLOR_TARGET + wgt::TextureUses::COLOR_TARGET } TextureClearMode::None => { return Err(ClearError::NoValidTextureClearMode( @@ -455,7 +458,7 @@ fn clear_texture_via_render_passes( mip_level, depth_or_layer, ), - usage: hal::TextureUses::COLOR_TARGET, + usage: wgt::TextureUses::COLOR_TARGET, }, resolve_target: None, ops: hal::AttachmentOps::STORE, @@ -473,7 +476,7 @@ fn clear_texture_via_render_passes( mip_level, depth_or_layer, ), - usage: hal::TextureUses::DEPTH_STENCIL_WRITE, + usage: wgt::TextureUses::DEPTH_STENCIL_WRITE, }, depth_ops: hal::AttachmentOps::STORE, stencil_ops: hal::AttachmentOps::STORE, diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index 0fa6845d28..921f2d6735 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -60,7 +60,7 @@ impl ComputePass { } = desc; Self { - base: Some(BasePass::new(label)), + base: Some(BasePass::new(&label)), parent, timestamp_writes, @@ -95,17 +95,13 @@ impl fmt::Debug for ComputePass { } #[derive(Clone, Debug, Default)] -pub struct ComputePassDescriptor<'a> { +pub struct ComputePassDescriptor<'a, PTW = PassTimestampWrites> { pub label: Label<'a>, /// Defines where and when timestamp values will be written for this pass. - pub timestamp_writes: Option<&'a PassTimestampWrites>, + pub timestamp_writes: Option, } -struct ArcComputePassDescriptor<'a> { - pub label: &'a Label<'a>, - /// Defines where and when timestamp values will be written for this pass. - pub timestamp_writes: Option, -} +type ArcComputePassDescriptor<'a> = ComputePassDescriptor<'a, ArcPassTimestampWrites>; #[derive(Clone, Debug, Error)] #[non_exhaustive] @@ -292,7 +288,7 @@ impl Global { let hub = &self.hub; let mut arc_desc = ArcComputePassDescriptor { - label: &desc.label, + label: desc.label.as_deref().map(std::borrow::Cow::Borrowed), timestamp_writes: None, // Handle only once we resolved the encoder. }; @@ -307,6 +303,7 @@ impl Global { arc_desc.timestamp_writes = match desc .timestamp_writes + .as_ref() .map(|tw| { Self::validate_pass_timestamp_writes(&cmd_buf.device, &hub.query_sets.read(), tw) }) @@ -366,7 +363,7 @@ impl Global { encoder_id, &ComputePassDescriptor { label: label.as_deref().map(std::borrow::Cow::Borrowed), - timestamp_writes, + timestamp_writes: timestamp_writes.cloned(), }, ); if let Some(err) = encoder_error { @@ -938,7 +935,7 @@ fn dispatch_indirect( let src_transition = state .intermediate_trackers .buffers - .set_single(&buffer, hal::BufferUses::STORAGE_READ_ONLY); + .set_single(&buffer, wgt::BufferUses::STORAGE_READ_ONLY); let src_barrier = src_transition.map(|transition| transition.into_hal(&buffer, &state.snatch_guard)); unsafe { @@ -949,8 +946,8 @@ fn dispatch_indirect( state.raw_encoder.transition_buffers(&[hal::BufferBarrier { buffer: params.dst_buffer, usage: hal::StateTransition { - from: hal::BufferUses::INDIRECT, - to: hal::BufferUses::STORAGE_READ_WRITE, + from: wgt::BufferUses::INDIRECT, + to: wgt::BufferUses::STORAGE_READ_WRITE, }, }]); } @@ -996,8 +993,8 @@ fn dispatch_indirect( state.raw_encoder.transition_buffers(&[hal::BufferBarrier { buffer: params.dst_buffer, usage: hal::StateTransition { - from: hal::BufferUses::STORAGE_READ_WRITE, - to: hal::BufferUses::INDIRECT, + from: wgt::BufferUses::STORAGE_READ_WRITE, + to: wgt::BufferUses::INDIRECT, }, }]); } @@ -1012,7 +1009,7 @@ fn dispatch_indirect( state .scope .buffers - .merge_single(&buffer, hal::BufferUses::INDIRECT)?; + .merge_single(&buffer, wgt::BufferUses::INDIRECT)?; use crate::resource::Trackable; state.flush_states(Some(buffer.tracker_index()))?; diff --git a/wgpu-core/src/command/compute_command.rs b/wgpu-core/src/command/compute_command.rs index 67c23d9452..e8c9ad9974 100644 --- a/wgpu-core/src/command/compute_command.rs +++ b/wgpu-core/src/command/compute_command.rs @@ -228,7 +228,7 @@ pub enum ArcComputeCommand { }, PushDebugGroup { - #[cfg_attr(target_os = "emscripten", allow(dead_code))] + #[cfg_attr(not(any(feature = "serde", feature = "replay")), allow(dead_code))] color: u32, len: usize, }, @@ -236,7 +236,7 @@ pub enum ArcComputeCommand { PopDebugGroup, InsertDebugMarker { - #[cfg_attr(target_os = "emscripten", allow(dead_code))] + #[cfg_attr(not(any(feature = "serde", feature = "replay")), allow(dead_code))] color: u32, len: usize, }, diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs index 50a2772a95..7f3bb10645 100644 --- a/wgpu-core/src/command/memory_init.rs +++ b/wgpu-core/src/command/memory_init.rs @@ -1,4 +1,6 @@ -use std::{collections::hash_map::Entry, ops::Range, sync::Arc, vec::Drain}; +use std::{ops::Range, sync::Arc, vec::Drain}; + +use hashbrown::hash_map::Entry; use crate::{ device::Device, @@ -211,7 +213,7 @@ impl BakedCommands { // must already know about it. let transition = device_tracker .buffers - .set_single(&buffer, hal::BufferUses::COPY_DST); + .set_single(&buffer, wgt::BufferUses::COPY_DST); let raw_buf = buffer.try_raw(snatch_guard)?; diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index f4ff30a392..a699545c0e 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -12,6 +12,7 @@ mod render; mod render_command; mod timestamp_writes; mod transfer; +mod transition_resources; use std::mem::{self, ManuallyDrop}; use std::sync::Arc; diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs index c2444aa129..6ec7069bdb 100644 --- a/wgpu-core/src/command/query.rs +++ b/wgpu-core/src/command/query.rs @@ -145,17 +145,17 @@ pub enum ResolveError { #[error("Resolving queries {start_query}..{end_query} would overrun the query set of size {query_set_size}")] QueryOverrun { start_query: u32, - end_query: u32, + end_query: u64, query_set_size: u32, }, - #[error("Resolving queries {start_query}..{end_query} ({stride} byte queries) will end up overrunning the bounds of the destination buffer of size {buffer_size} using offsets {buffer_start_offset}..{buffer_end_offset}")] + #[error("Resolving queries {start_query}..{end_query} ({stride} byte queries) will end up overrunning the bounds of the destination buffer of size {buffer_size} using offsets {buffer_start_offset}..( + {bytes_used})")] BufferOverrun { start_query: u32, end_query: u32, stride: u32, buffer_size: BufferAddress, buffer_start_offset: BufferAddress, - buffer_end_offset: BufferAddress, + bytes_used: BufferAddress, }, } @@ -396,7 +396,7 @@ impl Global { let dst_pending = cmd_buf_data .trackers .buffers - .set_single(&dst_buffer, hal::BufferUses::COPY_DST); + .set_single(&dst_buffer, wgt::BufferUses::COPY_DST); let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard)); @@ -404,8 +404,10 @@ impl Global { .check_usage(wgt::BufferUsages::QUERY_RESOLVE) .map_err(ResolveError::MissingBufferUsage)?; - let end_query = start_query + query_count; - if end_query > query_set.desc.count { + let end_query = u64::from(start_query) + .checked_add(u64::from(query_count)) + .expect("`u64` overflow from adding two `u32`s, should be unreachable"); + if end_query > u64::from(query_set.desc.count) { return Err(ResolveError::QueryOverrun { start_query, end_query, @@ -413,6 +415,8 @@ impl Global { } .into()); } + let end_query = u32::try_from(end_query) + .expect("`u32` overflow for `end_query`, which should be `u32`"); let elements_per_query = match query_set.desc.ty { wgt::QueryType::Occlusion => 1, @@ -420,22 +424,22 @@ impl Global { wgt::QueryType::Timestamp => 1, }; let stride = elements_per_query * wgt::QUERY_SIZE; - let bytes_used = (stride * query_count) as BufferAddress; + let bytes_used: BufferAddress = u64::from(stride) + .checked_mul(u64::from(query_count)) + .expect("`stride` * `query_count` overflowed `u32`, should be unreachable"); let buffer_start_offset = destination_offset; - let buffer_end_offset = buffer_start_offset + bytes_used; - - if buffer_end_offset > dst_buffer.size { - return Err(ResolveError::BufferOverrun { + let buffer_end_offset = buffer_start_offset + .checked_add(bytes_used) + .filter(|buffer_end_offset| *buffer_end_offset <= dst_buffer.size) + .ok_or(ResolveError::BufferOverrun { start_query, end_query, stride, buffer_size: dst_buffer.size, buffer_start_offset, - buffer_end_offset, - } - .into()); - } + bytes_used, + })?; // TODO(https://github.com/gfx-rs/wgpu/issues/3993): Need to track initialization state. cmd_buf_data.buffer_memory_init_actions.extend( diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs index 22970d542b..498b37d07b 100644 --- a/wgpu-core/src/command/ray_tracing.rs +++ b/wgpu-core/src/command/ray_tracing.rs @@ -17,10 +17,9 @@ use crate::{ FastHashSet, }; -use wgt::{math::align_to, BufferUsages, Features}; +use wgt::{math::align_to, BufferUsages, BufferUses, Features}; use super::CommandBufferMutable; -use hal::BufferUses; use std::{ cmp::max, num::NonZeroU64, @@ -397,7 +396,7 @@ impl Global { instance.map(|instance| TraceTlasInstance { blas_id: instance.blas_id, transform: *instance.transform, - custom_index: instance.custom_index, + custom_data: instance.custom_data, mask: instance.mask, }) }) @@ -445,7 +444,7 @@ impl Global { instance.as_ref().map(|instance| TlasInstance { blas_id: instance.blas_id, transform: &instance.transform, - custom_index: instance.custom_index, + custom_data: instance.custom_data, mask: instance.mask, }) }); @@ -513,7 +512,7 @@ impl Global { let mut instance_count = 0; for instance in package.instances.flatten() { - if instance.custom_index >= (1u32 << 24u32) { + if instance.custom_data >= (1u32 << 24u32) { return Err(BuildAccelerationStructureError::TlasInvalidCustomIndex( tlas.error_ident(), )); @@ -525,7 +524,7 @@ impl Global { instance_buffer_staging_source.extend(device.raw().tlas_instance_to_bytes( hal::TlasInstance { transform: *instance.transform, - custom_index: instance.custom_index, + custom_data: instance.custom_data, mask: instance.mask, blas_address: blas.handle, }, diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index abbbcfb46a..7885575465 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -29,7 +29,7 @@ use crate::{ DestroyedResourceError, Labeled, MissingBufferUsageError, MissingTextureUsageError, ParentDevice, QuerySet, Texture, TextureView, TextureViewNotRenderableReason, }, - track::{ResourceUsageCompatibilityError, TextureSelector, Tracker, UsageScope}, + track::{ResourceUsageCompatibilityError, Tracker, UsageScope}, Label, }; @@ -37,7 +37,7 @@ use arrayvec::ArrayVec; use thiserror::Error; use wgt::{ BufferAddress, BufferSize, BufferUsages, Color, DynamicOffset, IndexFormat, ShaderStages, - TextureUsages, TextureViewDimension, VertexStepMode, + TextureSelector, TextureUsages, TextureViewDimension, VertexStepMode, }; #[cfg(feature = "serde")] @@ -158,11 +158,11 @@ impl ResolvedPassChannel { #[repr(C)] #[derive(Clone, Debug, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct RenderPassColorAttachment { +pub struct RenderPassColorAttachment { /// The view to use as an attachment. - pub view: id::TextureViewId, + pub view: TV, /// The view that will receive the resolved output if multisampling is used. - pub resolve_target: Option, + pub resolve_target: Option, /// Operation to perform to the output attachment at the start of a /// renderpass. /// @@ -173,22 +173,8 @@ pub struct RenderPassColorAttachment { pub store_op: StoreOp, } -/// Describes a color attachment to a render pass. -#[derive(Debug)] -struct ArcRenderPassColorAttachment { - /// The view to use as an attachment. - pub view: Arc, - /// The view that will receive the resolved output if multisampling is used. - pub resolve_target: Option>, - /// Operation to perform to the output attachment at the start of a - /// renderpass. - /// - /// This must be clear if it is the first renderpass rendering to a swap - /// chain image. - pub load_op: LoadOp, - /// Operation to perform to the output attachment at the end of a renderpass. - pub store_op: StoreOp, -} +pub type ArcRenderPassColorAttachment = RenderPassColorAttachment>; + impl ArcRenderPassColorAttachment { fn hal_ops(&self) -> hal::AttachmentOps { load_hal_ops(self.load_op) | store_hal_ops(self.store_op) @@ -779,11 +765,11 @@ where struct RenderAttachment { texture: Arc, selector: TextureSelector, - usage: hal::TextureUses, + usage: wgt::TextureUses, } impl TextureView { - fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment { + fn to_render_attachment(&self, usage: wgt::TextureUses) -> RenderAttachment { RenderAttachment { texture: self.parent.clone(), selector: self.selector.clone(), @@ -1049,9 +1035,9 @@ impl<'d> RenderPassInfo<'d> { .flags .contains(wgt::DownlevelFlags::READ_ONLY_DEPTH_STENCIL) { - hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::RESOURCE + wgt::TextureUses::DEPTH_STENCIL_READ | wgt::TextureUses::RESOURCE } else { - hal::TextureUses::DEPTH_STENCIL_WRITE + wgt::TextureUses::DEPTH_STENCIL_WRITE }; render_attachments.push(view.to_render_attachment(usage)); @@ -1104,7 +1090,7 @@ impl<'d> RenderPassInfo<'d> { &mut pending_discard_init_fixups, ); render_attachments - .push(color_view.to_render_attachment(hal::TextureUses::COLOR_TARGET)); + .push(color_view.to_render_attachment(wgt::TextureUses::COLOR_TARGET)); let mut hal_resolve_target = None; if let Some(resolve_view) = &at.resolve_target { @@ -1160,18 +1146,18 @@ impl<'d> RenderPassInfo<'d> { TextureInitRange::from(resolve_view.selector.clone()), ); render_attachments - .push(resolve_view.to_render_attachment(hal::TextureUses::COLOR_TARGET)); + .push(resolve_view.to_render_attachment(wgt::TextureUses::COLOR_TARGET)); hal_resolve_target = Some(hal::Attachment { view: resolve_view.try_raw(snatch_guard)?, - usage: hal::TextureUses::COLOR_TARGET, + usage: wgt::TextureUses::COLOR_TARGET, }); } color_attachments_hal.push(Some(hal::ColorAttachment { target: hal::Attachment { view: color_view.try_raw(snatch_guard)?, - usage: hal::TextureUses::COLOR_TARGET, + usage: wgt::TextureUses::COLOR_TARGET, }, resolve_target: hal_resolve_target, ops: at.hal_ops(), @@ -1333,7 +1319,7 @@ impl<'d> RenderPassInfo<'d> { depth_stencil_attachment: Some(hal::DepthStencilAttachment { target: hal::Attachment { view: view.try_raw(snatch_guard)?, - usage: hal::TextureUses::DEPTH_STENCIL_WRITE, + usage: wgt::TextureUses::DEPTH_STENCIL_WRITE, }, depth_ops, stencil_ops, @@ -2167,7 +2153,7 @@ fn set_index_buffer( .info .usage_scope .buffers - .merge_single(&buffer, hal::BufferUses::INDEX)?; + .merge_single(&buffer, wgt::BufferUses::INDEX)?; buffer.same_device_as(cmd_buf.as_ref())?; @@ -2216,7 +2202,7 @@ fn set_vertex_buffer( .info .usage_scope .buffers - .merge_single(&buffer, hal::BufferUses::VERTEX)?; + .merge_single(&buffer, wgt::BufferUses::VERTEX)?; buffer.same_device_as(cmd_buf.as_ref())?; @@ -2496,7 +2482,7 @@ fn multi_draw_indirect( .info .usage_scope .buffers - .merge_single(&indirect_buffer, hal::BufferUses::INDIRECT)?; + .merge_single(&indirect_buffer, wgt::BufferUses::INDIRECT)?; indirect_buffer.check_usage(BufferUsages::INDIRECT)?; let indirect_raw = indirect_buffer.try_raw(state.snatch_guard)?; @@ -2573,7 +2559,7 @@ fn multi_draw_indirect_count( .info .usage_scope .buffers - .merge_single(&indirect_buffer, hal::BufferUses::INDIRECT)?; + .merge_single(&indirect_buffer, wgt::BufferUses::INDIRECT)?; indirect_buffer.check_usage(BufferUsages::INDIRECT)?; let indirect_raw = indirect_buffer.try_raw(state.snatch_guard)?; @@ -2582,7 +2568,7 @@ fn multi_draw_indirect_count( .info .usage_scope .buffers - .merge_single(&count_buffer, hal::BufferUses::INDIRECT)?; + .merge_single(&count_buffer, wgt::BufferUses::INDIRECT)?; count_buffer.check_usage(BufferUsages::INDIRECT)?; let count_raw = count_buffer.try_raw(state.snatch_guard)?; diff --git a/wgpu-core/src/command/render_command.rs b/wgpu-core/src/command/render_command.rs index 549d140bb5..8585f645ec 100644 --- a/wgpu-core/src/command/render_command.rs +++ b/wgpu-core/src/command/render_command.rs @@ -473,13 +473,13 @@ pub enum ArcRenderCommand { indexed: bool, }, PushDebugGroup { - #[cfg_attr(target_os = "emscripten", allow(dead_code))] + #[cfg_attr(not(any(feature = "serde", feature = "replay")), allow(dead_code))] color: u32, len: usize, }, PopDebugGroup, InsertDebugMarker { - #[cfg_attr(target_os = "emscripten", allow(dead_code))] + #[cfg_attr(not(any(feature = "serde", feature = "replay")), allow(dead_code))] color: u32, len: usize, }, diff --git a/wgpu-core/src/command/timestamp_writes.rs b/wgpu-core/src/command/timestamp_writes.rs index e91b48534d..042aec46e2 100644 --- a/wgpu-core/src/command/timestamp_writes.rs +++ b/wgpu-core/src/command/timestamp_writes.rs @@ -5,21 +5,13 @@ use crate::id; /// Describes the writing of timestamp values in a render or compute pass. #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct PassTimestampWrites { +pub struct PassTimestampWrites { /// The query set to write the timestamps to. - pub query_set: id::QuerySetId, + pub query_set: QS, /// The index of the query set at which a start timestamp of this pass is written, if any. pub beginning_of_pass_write_index: Option, /// The index of the query set at which an end timestamp of this pass is written, if any. pub end_of_pass_write_index: Option, } -/// Describes the writing of timestamp values in a render or compute pass with the query set resolved. -pub struct ArcPassTimestampWrites { - /// The query set to write the timestamps to. - pub query_set: Arc, - /// The index of the query set at which a start timestamp of this pass is written, if any. - pub beginning_of_pass_write_index: Option, - /// The index of the query set at which an end timestamp of this pass is written, if any. - pub end_of_pass_write_index: Option, -} +pub type ArcPassTimestampWrites = PassTimestampWrites>; diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 291c44bd2c..1113e9b3eb 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -16,12 +16,11 @@ use crate::{ MissingTextureUsageError, ParentDevice, Texture, TextureErrorDimension, }, snatch::SnatchGuard, - track::TextureSelector, }; use arrayvec::ArrayVec; use thiserror::Error; -use wgt::{BufferAddress, BufferUsages, Extent3d, TextureUsages}; +use wgt::{BufferAddress, BufferUsages, Extent3d, TextureSelector, TextureUsages}; use std::sync::Arc; @@ -576,7 +575,7 @@ impl Global { let src_pending = cmd_buf_data .trackers .buffers - .set_single(&src_buffer, hal::BufferUses::COPY_SRC); + .set_single(&src_buffer, wgt::BufferUses::COPY_SRC); let src_raw = src_buffer.try_raw(&snatch_guard)?; src_buffer @@ -592,7 +591,7 @@ impl Global { let dst_pending = cmd_buf_data .trackers .buffers - .set_single(&dst_buffer, hal::BufferUses::COPY_DST); + .set_single(&dst_buffer, wgt::BufferUses::COPY_DST); let dst_raw = dst_buffer.try_raw(&snatch_guard)?; dst_buffer @@ -767,7 +766,7 @@ impl Global { let src_pending = cmd_buf_data .trackers .buffers - .set_single(&src_buffer, hal::BufferUses::COPY_SRC); + .set_single(&src_buffer, wgt::BufferUses::COPY_SRC); let src_raw = src_buffer.try_raw(&snatch_guard)?; src_buffer @@ -778,7 +777,7 @@ impl Global { let dst_pending = cmd_buf_data.trackers.textures.set_single( &dst_texture, dst_range, - hal::TextureUses::COPY_DST, + wgt::TextureUses::COPY_DST, ); let dst_raw = dst_texture.try_raw(&snatch_guard)?; dst_texture @@ -916,7 +915,7 @@ impl Global { let src_pending = cmd_buf_data.trackers.textures.set_single( &src_texture, src_range, - hal::TextureUses::COPY_SRC, + wgt::TextureUses::COPY_SRC, ); let src_raw = src_texture.try_raw(&snatch_guard)?; src_texture @@ -946,7 +945,7 @@ impl Global { let dst_pending = cmd_buf_data .trackers .buffers - .set_single(&dst_buffer, hal::BufferUses::COPY_DST); + .set_single(&dst_buffer, wgt::BufferUses::COPY_DST); let dst_raw = dst_buffer.try_raw(&snatch_guard)?; dst_buffer @@ -1010,7 +1009,7 @@ impl Global { cmd_buf_raw.transition_textures(&src_barrier); cmd_buf_raw.copy_texture_to_buffer( src_raw, - hal::TextureUses::COPY_SRC, + wgt::TextureUses::COPY_SRC, dst_raw, ®ions, ); @@ -1125,7 +1124,7 @@ impl Global { let src_pending = cmd_buf_data.trackers.textures.set_single( &src_texture, src_range, - hal::TextureUses::COPY_SRC, + wgt::TextureUses::COPY_SRC, ); let src_raw = src_texture.try_raw(&snatch_guard)?; src_texture @@ -1141,7 +1140,7 @@ impl Global { let dst_pending = cmd_buf_data.trackers.textures.set_single( &dst_texture, dst_range, - hal::TextureUses::COPY_DST, + wgt::TextureUses::COPY_DST, ); let dst_raw = dst_texture.try_raw(&snatch_guard)?; dst_texture @@ -1173,7 +1172,7 @@ impl Global { cmd_buf_raw.transition_textures(&barriers); cmd_buf_raw.copy_texture_to_texture( src_raw, - hal::TextureUses::COPY_SRC, + wgt::TextureUses::COPY_SRC, dst_raw, ®ions, ); diff --git a/wgpu-core/src/command/transition_resources.rs b/wgpu-core/src/command/transition_resources.rs new file mode 100644 index 0000000000..794343e27f --- /dev/null +++ b/wgpu-core/src/command/transition_resources.rs @@ -0,0 +1,93 @@ +use thiserror::Error; + +use crate::{ + command::CommandBuffer, + device::DeviceError, + global::Global, + id::{BufferId, CommandEncoderId, TextureId}, + resource::{InvalidResourceError, ParentDevice}, + track::ResourceUsageCompatibilityError, +}; + +use super::CommandEncoderError; + +impl Global { + pub fn command_encoder_transition_resources( + &self, + command_encoder_id: CommandEncoderId, + buffer_transitions: impl Iterator>, + texture_transitions: impl Iterator>, + ) -> Result<(), TransitionResourcesError> { + profiling::scope!("CommandEncoder::transition_resources"); + + let hub = &self.hub; + + // Lock command encoder for recording + let cmd_buf = hub + .command_buffers + .get(command_encoder_id.into_command_buffer_id()); + let mut cmd_buf_data = cmd_buf.data.lock(); + let mut cmd_buf_data_guard = cmd_buf_data.record()?; + let cmd_buf_data = &mut *cmd_buf_data_guard; + + // Get and lock device + let device = &cmd_buf.device; + device.check_is_valid()?; + let snatch_guard = &device.snatchable_lock.read(); + + let mut usage_scope = device.new_usage_scope(); + let indices = &device.tracker_indices; + usage_scope.buffers.set_size(indices.buffers.size()); + usage_scope.textures.set_size(indices.textures.size()); + + // Process buffer transitions + for buffer_transition in buffer_transitions { + let buffer = hub.buffers.get(buffer_transition.buffer).get()?; + buffer.same_device_as(cmd_buf.as_ref())?; + + usage_scope + .buffers + .merge_single(&buffer, buffer_transition.state)?; + } + + // Process texture transitions + for texture_transition in texture_transitions { + let texture = hub.textures.get(texture_transition.texture).get()?; + texture.same_device_as(cmd_buf.as_ref())?; + + unsafe { + usage_scope.textures.merge_single( + &texture, + texture_transition.selector, + texture_transition.state, + ) + }?; + } + + // Record any needed barriers based on tracker data + let cmd_buf_raw = cmd_buf_data.encoder.open()?; + CommandBuffer::insert_barriers_from_scope( + cmd_buf_raw, + &mut cmd_buf_data.trackers, + &usage_scope, + snatch_guard, + ); + cmd_buf_data_guard.mark_successful(); + + Ok(()) + } +} + +/// Error encountered while attempting to perform [`Global::command_encoder_transition_resources`]. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum TransitionResourcesError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error(transparent)] + InvalidResource(#[from] InvalidResourceError), + #[error(transparent)] + ResourceUsage(#[from] ResourceUsageCompatibilityError), +} diff --git a/wgpu-core/src/conv.rs b/wgpu-core/src/conv.rs index 27eaff6039..5216ad0f69 100644 --- a/wgpu-core/src/conv.rs +++ b/wgpu-core/src/conv.rs @@ -51,54 +51,54 @@ pub fn is_valid_external_image_copy_dst_texture_format(format: wgt::TextureForma } } -pub fn map_buffer_usage(usage: wgt::BufferUsages) -> hal::BufferUses { - let mut u = hal::BufferUses::empty(); +pub fn map_buffer_usage(usage: wgt::BufferUsages) -> wgt::BufferUses { + let mut u = wgt::BufferUses::empty(); u.set( - hal::BufferUses::MAP_READ, + wgt::BufferUses::MAP_READ, usage.contains(wgt::BufferUsages::MAP_READ), ); u.set( - hal::BufferUses::MAP_WRITE, + wgt::BufferUses::MAP_WRITE, usage.contains(wgt::BufferUsages::MAP_WRITE), ); u.set( - hal::BufferUses::COPY_SRC, + wgt::BufferUses::COPY_SRC, usage.contains(wgt::BufferUsages::COPY_SRC), ); u.set( - hal::BufferUses::COPY_DST, + wgt::BufferUses::COPY_DST, usage.contains(wgt::BufferUsages::COPY_DST), ); u.set( - hal::BufferUses::INDEX, + wgt::BufferUses::INDEX, usage.contains(wgt::BufferUsages::INDEX), ); u.set( - hal::BufferUses::VERTEX, + wgt::BufferUses::VERTEX, usage.contains(wgt::BufferUsages::VERTEX), ); u.set( - hal::BufferUses::UNIFORM, + wgt::BufferUses::UNIFORM, usage.contains(wgt::BufferUsages::UNIFORM), ); u.set( - hal::BufferUses::STORAGE_READ_ONLY | hal::BufferUses::STORAGE_READ_WRITE, + wgt::BufferUses::STORAGE_READ_ONLY | wgt::BufferUses::STORAGE_READ_WRITE, usage.contains(wgt::BufferUsages::STORAGE), ); u.set( - hal::BufferUses::INDIRECT, + wgt::BufferUses::INDIRECT, usage.contains(wgt::BufferUsages::INDIRECT), ); u.set( - hal::BufferUses::QUERY_RESOLVE, + wgt::BufferUses::QUERY_RESOLVE, usage.contains(wgt::BufferUsages::QUERY_RESOLVE), ); u.set( - hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + wgt::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, usage.contains(wgt::BufferUsages::BLAS_INPUT), ); u.set( - hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + wgt::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, usage.contains(wgt::BufferUsages::TLAS_INPUT), ); u @@ -108,45 +108,45 @@ pub fn map_texture_usage( usage: wgt::TextureUsages, aspect: hal::FormatAspects, flags: wgt::TextureFormatFeatureFlags, -) -> hal::TextureUses { - let mut u = hal::TextureUses::empty(); +) -> wgt::TextureUses { + let mut u = wgt::TextureUses::empty(); u.set( - hal::TextureUses::COPY_SRC, + wgt::TextureUses::COPY_SRC, usage.contains(wgt::TextureUsages::COPY_SRC), ); u.set( - hal::TextureUses::COPY_DST, + wgt::TextureUses::COPY_DST, usage.contains(wgt::TextureUsages::COPY_DST), ); u.set( - hal::TextureUses::RESOURCE, + wgt::TextureUses::RESOURCE, usage.contains(wgt::TextureUsages::TEXTURE_BINDING), ); if usage.contains(wgt::TextureUsages::STORAGE_BINDING) { u.set( - hal::TextureUses::STORAGE_READ_ONLY, + wgt::TextureUses::STORAGE_READ_ONLY, flags.contains(wgt::TextureFormatFeatureFlags::STORAGE_READ_ONLY), ); u.set( - hal::TextureUses::STORAGE_WRITE_ONLY, + wgt::TextureUses::STORAGE_WRITE_ONLY, flags.contains(wgt::TextureFormatFeatureFlags::STORAGE_WRITE_ONLY), ); u.set( - hal::TextureUses::STORAGE_READ_WRITE, + wgt::TextureUses::STORAGE_READ_WRITE, flags.contains(wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE), ); } let is_color = aspect.contains(hal::FormatAspects::COLOR); u.set( - hal::TextureUses::COLOR_TARGET, + wgt::TextureUses::COLOR_TARGET, usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && is_color, ); u.set( - hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::DEPTH_STENCIL_WRITE, + wgt::TextureUses::DEPTH_STENCIL_READ | wgt::TextureUses::DEPTH_STENCIL_WRITE, usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && !is_color, ); u.set( - hal::TextureUses::STORAGE_ATOMIC, + wgt::TextureUses::STORAGE_ATOMIC, usage.contains(wgt::TextureUsages::STORAGE_ATOMIC), ); u @@ -155,14 +155,14 @@ pub fn map_texture_usage( pub fn map_texture_usage_for_texture( desc: &TextureDescriptor, format_features: &TextureFormatFeatures, -) -> hal::TextureUses { +) -> wgt::TextureUses { // Enforce having COPY_DST/DEPTH_STENCIL_WRITE/COLOR_TARGET otherwise we // wouldn't be able to initialize the texture. map_texture_usage(desc.usage, desc.format.into(), format_features.flags) | if desc.format.is_depth_stencil_format() { - hal::TextureUses::DEPTH_STENCIL_WRITE + wgt::TextureUses::DEPTH_STENCIL_WRITE } else if desc.usage.contains(wgt::TextureUsages::COPY_DST) { - hal::TextureUses::COPY_DST // (set already) + wgt::TextureUses::COPY_DST // (set already) } else { // Use COPY_DST only if we can't use COLOR_TARGET if format_features @@ -171,42 +171,42 @@ pub fn map_texture_usage_for_texture( && desc.dimension == wgt::TextureDimension::D2 // Render targets dimension must be 2d { - hal::TextureUses::COLOR_TARGET + wgt::TextureUses::COLOR_TARGET } else { - hal::TextureUses::COPY_DST + wgt::TextureUses::COPY_DST } } } -pub fn map_texture_usage_from_hal(uses: hal::TextureUses) -> wgt::TextureUsages { +pub fn map_texture_usage_from_hal(uses: wgt::TextureUses) -> wgt::TextureUsages { let mut u = wgt::TextureUsages::empty(); u.set( wgt::TextureUsages::COPY_SRC, - uses.contains(hal::TextureUses::COPY_SRC), + uses.contains(wgt::TextureUses::COPY_SRC), ); u.set( wgt::TextureUsages::COPY_DST, - uses.contains(hal::TextureUses::COPY_DST), + uses.contains(wgt::TextureUses::COPY_DST), ); u.set( wgt::TextureUsages::TEXTURE_BINDING, - uses.contains(hal::TextureUses::RESOURCE), + uses.contains(wgt::TextureUses::RESOURCE), ); u.set( wgt::TextureUsages::STORAGE_BINDING, uses.intersects( - hal::TextureUses::STORAGE_READ_ONLY - | hal::TextureUses::STORAGE_WRITE_ONLY - | hal::TextureUses::STORAGE_READ_WRITE, + wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE, ), ); u.set( wgt::TextureUsages::RENDER_ATTACHMENT, - uses.contains(hal::TextureUses::COLOR_TARGET), + uses.contains(wgt::TextureUses::COLOR_TARGET), ); u.set( wgt::TextureUsages::STORAGE_ATOMIC, - uses.contains(hal::TextureUses::STORAGE_ATOMIC), + uses.contains(wgt::TextureUses::STORAGE_ATOMIC), ); u } diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 763edf2121..e4211ef2f0 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -73,8 +73,8 @@ impl Queue { .transition_buffers(&[hal::BufferBarrier { buffer: zero_buffer, usage: hal::StateTransition { - from: hal::BufferUses::empty(), - to: hal::BufferUses::COPY_DST, + from: wgt::BufferUses::empty(), + to: wgt::BufferUses::COPY_DST, }, }]); pending_writes @@ -85,8 +85,8 @@ impl Queue { .transition_buffers(&[hal::BufferBarrier { buffer: zero_buffer, usage: hal::StateTransition { - from: hal::BufferUses::COPY_DST, - to: hal::BufferUses::COPY_SRC, + from: wgt::BufferUses::COPY_DST, + to: wgt::BufferUses::COPY_SRC, }, }]); } @@ -588,7 +588,7 @@ impl Queue { let mut trackers = self.device.trackers.lock(); trackers .buffers - .set_single(&buffer, hal::BufferUses::COPY_DST) + .set_single(&buffer, wgt::BufferUses::COPY_DST) }; let snatch_guard = self.device.snatchable_lock.read(); @@ -606,8 +606,8 @@ impl Queue { let barriers = iter::once(hal::BufferBarrier { buffer: staging_buffer.raw(), usage: hal::StateTransition { - from: hal::BufferUses::MAP_WRITE, - to: hal::BufferUses::COPY_SRC, + from: wgt::BufferUses::MAP_WRITE, + to: wgt::BufferUses::COPY_SRC, }, }) .chain(transition.map(|pending| pending.into_hal(&buffer, &snatch_guard))) @@ -697,6 +697,8 @@ impl Queue { .map_err(TransferError::from)?; } + let snatch_guard = self.device.snatchable_lock.read(); + let mut pending_writes = self.pending_writes.lock(); let encoder = pending_writes.activate(); @@ -732,7 +734,7 @@ impl Queue { &mut trackers.textures, &self.device.alignments, self.device.zero_buffer.as_ref(), - &self.device.snatchable_lock.read(), + &snatch_guard, ) .map_err(QueueWriteError::from)?; } @@ -742,8 +744,6 @@ impl Queue { } } - let snatch_guard = self.device.snatchable_lock.read(); - let dst_raw = dst.try_raw(&snatch_guard)?; let (block_width, block_height) = dst.desc.format.block_dimensions(); @@ -828,8 +828,8 @@ impl Queue { let buffer_barrier = hal::BufferBarrier { buffer: staging_buffer.raw(), usage: hal::StateTransition { - from: hal::BufferUses::MAP_WRITE, - to: hal::BufferUses::COPY_SRC, + from: wgt::BufferUses::MAP_WRITE, + to: wgt::BufferUses::COPY_SRC, }, }; @@ -837,7 +837,7 @@ impl Queue { let transition = trackers .textures - .set_single(&dst, selector, hal::TextureUses::COPY_DST); + .set_single(&dst, selector, wgt::TextureUses::COPY_DST); let texture_barriers = transition .map(|pending| pending.into_hal(dst_raw)) .collect::>(); @@ -1014,7 +1014,7 @@ impl Queue { let mut trackers = self.device.trackers.lock(); let transitions = trackers .textures - .set_single(&dst, selector, hal::TextureUses::COPY_DST); + .set_single(&dst, selector, wgt::TextureUses::COPY_DST); // `copy_external_image_to_texture` is exclusive to the WebGL backend. // Don't go through the `DynCommandEncoder` abstraction and directly to the WebGL backend. @@ -1221,7 +1221,7 @@ impl Queue { unsafe { used_surface_textures - .merge_single(texture, None, hal::TextureUses::PRESENT) + .merge_single(texture, None, wgt::TextureUses::PRESENT) .unwrap() }; } @@ -1532,7 +1532,7 @@ fn validate_command_buffer( if should_extend { unsafe { used_surface_textures - .merge_single(texture, None, hal::TextureUses::PRESENT) + .merge_single(texture, None, wgt::TextureUses::PRESENT) .unwrap(); }; } diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs index a73f3d1578..c240897fc7 100644 --- a/wgpu-core/src/device/ray_tracing.rs +++ b/wgpu-core/src/device/ray_tracing.rs @@ -160,8 +160,8 @@ impl Device { self.raw().create_buffer(&hal::BufferDescriptor { label: Some("(wgpu-core) instances_buffer"), size: instance_buffer_size as u64, - usage: hal::BufferUses::COPY_DST - | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + usage: wgt::BufferUses::COPY_DST + | wgt::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::PREFER_COHERENT, }) } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 1381d722b7..b33fb78214 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -23,19 +23,18 @@ use crate::{ }, resource_log, snatch::{SnatchGuard, SnatchLock, Snatchable}, - track::{ - BindGroupStates, DeviceTracker, TextureSelector, TrackerIndexAllocators, UsageScope, - UsageScopePool, - }, + track::{BindGroupStates, DeviceTracker, TrackerIndexAllocators, UsageScope, UsageScopePool}, validation::{self, validate_color_attachment_bytes_per_sample}, weak_vec::WeakVec, FastHashMap, LabelHelpers, }; use arrayvec::ArrayVec; +use bitflags::Flags; use smallvec::SmallVec; use wgt::{ - math::align_to, DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension, + math::align_to, DeviceLostReason, TextureFormat, TextureSampleType, TextureSelector, + TextureViewDimension, }; use crate::resource::{AccelerationStructure, Tlas}; @@ -193,11 +192,11 @@ impl Device { raw_device: Box, adapter: &Arc, desc: &DeviceDescriptor, - trace_path: Option<&std::path::Path>, + trace_dir_name: Option<&str>, instance_flags: wgt::InstanceFlags, ) -> Result { #[cfg(not(feature = "trace"))] - if let Some(_) = trace_path { + if let Some(_) = trace_dir_name { log::error!("Feature 'trace' is not enabled"); } let fence = unsafe { raw_device.create_fence() }.map_err(DeviceError::from_hal)?; @@ -209,7 +208,7 @@ impl Device { raw_device.create_buffer(&hal::BufferDescriptor { label: hal_label(Some("(wgpu internal) zero init buffer"), instance_flags), size: ZERO_BUFFER_SIZE, - usage: hal::BufferUses::COPY_SRC | hal::BufferUses::COPY_DST, + usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST, memory_flags: hal::MemoryFlags::empty(), }) } @@ -256,7 +255,7 @@ impl Device { #[cfg(feature = "trace")] trace: Mutex::new( rank::DEVICE_TRACE, - trace_path.and_then(|path| match trace::Trace::new(path) { + trace_dir_name.and_then(|dir_path_name| match trace::Trace::new(dir_path_name) { Ok(mut trace) => { trace.add(trace::Action::Init { desc: desc.clone(), @@ -265,7 +264,7 @@ impl Device { Some(trace) } Err(e) => { - log::error!("Unable to start a trace in '{path:?}': {e}"); + log::error!("Unable to start a trace in '{dir_path_name:?}': {e}"); None } }), @@ -495,9 +494,7 @@ impl Device { self.require_downlevel_flags(wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER)?; } - if desc.usage.is_empty() - || desc.usage | wgt::BufferUsages::all() != wgt::BufferUsages::all() - { + if desc.usage.is_empty() || desc.usage.contains_unknown_bits() { return Err(resource::CreateBufferError::InvalidUsage(desc.usage)); } @@ -521,7 +518,7 @@ impl Device { self.require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?; // We are going to be reading from it, internally; // when validating the content of the buffer - usage |= hal::BufferUses::STORAGE_READ_ONLY | hal::BufferUses::STORAGE_READ_WRITE; + usage |= wgt::BufferUses::STORAGE_READ_ONLY | wgt::BufferUses::STORAGE_READ_WRITE; } if desc.mapped_at_creation { @@ -530,12 +527,12 @@ impl Device { } if !desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { // we are going to be copying into it, internally - usage |= hal::BufferUses::COPY_DST; + usage |= wgt::BufferUses::COPY_DST; } } else { // We are required to zero out (initialize) all memory. This is done // on demand using clear_buffer which requires write transfer usage! - usage |= hal::BufferUses::COPY_DST; + usage |= wgt::BufferUses::COPY_DST; } let actual_size = if desc.size == 0 { @@ -587,7 +584,7 @@ impl Device { let buffer = Arc::new(buffer); let buffer_use = if !desc.mapped_at_creation { - hal::BufferUses::empty() + wgt::BufferUses::empty() } else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { // buffer is mappable, so we are just doing that at start let map_size = buffer.size; @@ -605,7 +602,7 @@ impl Device { range: 0..map_size, host: HostMap::Write, }; - hal::BufferUses::MAP_WRITE + wgt::BufferUses::MAP_WRITE } else { let mut staging_buffer = StagingBuffer::new(self, wgt::BufferSize::new(aligned_size).unwrap())?; @@ -616,7 +613,7 @@ impl Device { buffer.initialization_status.write().drain(0..aligned_size); *buffer.map_state.lock() = resource::BufferMapState::Init { staging_buffer }; - hal::BufferUses::COPY_DST + wgt::BufferUses::COPY_DST }; self.trackers @@ -653,7 +650,7 @@ impl Device { self.trackers .lock() .textures - .insert_single(&texture, hal::TextureUses::UNINITIALIZED); + .insert_single(&texture, wgt::TextureUses::UNINITIALIZED); Ok(texture) } @@ -697,7 +694,7 @@ impl Device { self.trackers .lock() .buffers - .insert_single(&buffer, hal::BufferUses::empty()); + .insert_single(&buffer, wgt::BufferUses::empty()); (Fallible::Valid(buffer), None) } @@ -731,9 +728,7 @@ impl Device { self.check_is_valid()?; - if desc.usage.is_empty() - || desc.usage | wgt::TextureUsages::all() != wgt::TextureUsages::all() - { + if desc.usage.is_empty() || desc.usage.contains_unknown_bits() { return Err(CreateTextureError::InvalidUsage(desc.usage)); } @@ -946,12 +941,12 @@ impl Device { .map_err(|e| self.handle_hal_error(e))?; let clear_mode = if hal_usage - .intersects(hal::TextureUses::DEPTH_STENCIL_WRITE | hal::TextureUses::COLOR_TARGET) + .intersects(wgt::TextureUses::DEPTH_STENCIL_WRITE | wgt::TextureUses::COLOR_TARGET) { let (is_color, usage) = if desc.format.is_depth_stencil_format() { - (false, hal::TextureUses::DEPTH_STENCIL_WRITE) + (false, wgt::TextureUses::DEPTH_STENCIL_WRITE) } else { - (true, hal::TextureUses::COLOR_TARGET) + (true, wgt::TextureUses::COLOR_TARGET) }; let dimension = match desc.dimension { wgt::TextureDimension::D1 => TextureViewDimension::D1, @@ -1025,7 +1020,7 @@ impl Device { self.trackers .lock() .textures - .insert_single(&texture, hal::TextureUses::UNINITIALIZED); + .insert_single(&texture, wgt::TextureUses::UNINITIALIZED); Ok(texture) } @@ -1278,23 +1273,23 @@ impl Device { // filter the usages based on the other criteria let usage = { - let mask_copy = !(hal::TextureUses::COPY_SRC | hal::TextureUses::COPY_DST); + let mask_copy = !(wgt::TextureUses::COPY_SRC | wgt::TextureUses::COPY_DST); let mask_dimension = match resolved_dimension { TextureViewDimension::Cube | TextureViewDimension::CubeArray => { - hal::TextureUses::RESOURCE + wgt::TextureUses::RESOURCE } TextureViewDimension::D3 => { - hal::TextureUses::RESOURCE - | hal::TextureUses::STORAGE_READ_ONLY - | hal::TextureUses::STORAGE_WRITE_ONLY - | hal::TextureUses::STORAGE_READ_WRITE + wgt::TextureUses::RESOURCE + | wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE } - _ => hal::TextureUses::all(), + _ => wgt::TextureUses::all(), }; let mask_mip_level = if resolved_mip_level_count == 1 { - hal::TextureUses::all() + wgt::TextureUses::all() } else { - hal::TextureUses::RESOURCE + wgt::TextureUses::RESOURCE }; texture.hal_usage & mask_copy & mask_dimension & mask_mip_level }; @@ -1521,9 +1516,9 @@ impl Device { }; for (_, var) in module.global_variables.iter() { match var.binding { - Some(ref br) if br.group >= self.limits.max_bind_groups => { + Some(br) if br.group >= self.limits.max_bind_groups => { return Err(pipeline::CreateShaderModuleError::InvalidGroupIndex { - bind: br.clone(), + bind: br, group: br.group, limit: self.limits.max_bind_groups, }); @@ -1848,7 +1843,7 @@ impl Device { })?; } - if entry.visibility | wgt::ShaderStages::all() != wgt::ShaderStages::all() { + if entry.visibility.contains_unknown_bits() { return Err( binding_model::CreateBindGroupLayoutError::InvalidVisibility(entry.visibility), ); @@ -1908,6 +1903,9 @@ impl Device { .validate(&self.limits) .map_err(binding_model::CreateBindGroupLayoutError::TooManyBindings)?; + // Validate that binding arrays don't conflict with dynamic offsets. + count_validator.validate_binding_arrays()?; + let bgl = BindGroupLayout { raw: ManuallyDrop::new(raw), device: self.clone(), @@ -1955,15 +1953,15 @@ impl Device { let (pub_usage, internal_use, range_limit) = match binding_ty { wgt::BufferBindingType::Uniform => ( wgt::BufferUsages::UNIFORM, - hal::BufferUses::UNIFORM, + wgt::BufferUses::UNIFORM, self.limits.max_uniform_buffer_binding_size, ), wgt::BufferBindingType::Storage { read_only } => ( wgt::BufferUsages::STORAGE, if read_only { - hal::BufferUses::STORAGE_READ_ONLY + wgt::BufferUses::STORAGE_READ_ONLY } else { - hal::BufferUses::STORAGE_READ_WRITE + wgt::BufferUses::STORAGE_READ_WRITE }, self.limits.max_storage_buffer_binding_size, ), @@ -2438,7 +2436,7 @@ impl Device { decl: &wgt::BindGroupLayoutEntry, view: &TextureView, expected: &'static str, - ) -> Result { + ) -> Result { use crate::binding_model::CreateBindGroupError as Error; if view .desc @@ -2498,7 +2496,7 @@ impl Device { }); } view.check_usage(wgt::TextureUsages::TEXTURE_BINDING)?; - Ok(hal::TextureUses::RESOURCE) + Ok(wgt::TextureUses::RESOURCE) } wgt::BindingType::StorageTexture { access, @@ -2537,7 +2535,7 @@ impl Device { { return Err(Error::StorageWriteNotSupported(view.desc.format)); } - hal::TextureUses::STORAGE_WRITE_ONLY + wgt::TextureUses::STORAGE_WRITE_ONLY } wgt::StorageTextureAccess::ReadOnly => { if !view @@ -2547,7 +2545,7 @@ impl Device { { return Err(Error::StorageReadNotSupported(view.desc.format)); } - hal::TextureUses::STORAGE_READ_ONLY + wgt::TextureUses::STORAGE_READ_ONLY } wgt::StorageTextureAccess::ReadWrite => { if !view @@ -2558,7 +2556,7 @@ impl Device { return Err(Error::StorageReadWriteNotSupported(view.desc.format)); } - hal::TextureUses::STORAGE_READ_WRITE + wgt::TextureUses::STORAGE_READ_WRITE } wgt::StorageTextureAccess::Atomic => { if !view @@ -2569,7 +2567,7 @@ impl Device { return Err(Error::StorageAtomicNotSupported(view.desc.format)); } - hal::TextureUses::STORAGE_ATOMIC + wgt::TextureUses::STORAGE_ATOMIC } }; view.check_usage(wgt::TextureUsages::STORAGE_BINDING)?; @@ -2712,8 +2710,8 @@ impl Device { .map(|mut bgl_entry_map| { bgl_entry_map.sort(); match unique_bind_group_layouts.entry(bgl_entry_map) { - std::collections::hash_map::Entry::Occupied(v) => Ok(Arc::clone(v.get())), - std::collections::hash_map::Entry::Vacant(e) => { + hashbrown::hash_map::Entry::Occupied(v) => Ok(Arc::clone(v.get())), + hashbrown::hash_map::Entry::Vacant(e) => { match self.create_bind_group_layout( &None, e.key().clone(), @@ -3074,7 +3072,7 @@ impl Device { if let Some(cs) = cs.as_ref() { target_specified = true; let error = 'error: { - if cs.write_mask | wgt::ColorWrites::all() != wgt::ColorWrites::all() { + if cs.write_mask.contains_unknown_bits() { break 'error Some(pipeline::ColorStateError::InvalidWriteMask( cs.write_mask, )); diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs index 16902ea865..a5f849cb8c 100644 --- a/wgpu-core/src/device/trace.rs +++ b/wgpu-core/src/device/trace.rs @@ -220,7 +220,8 @@ pub struct Trace { #[cfg(feature = "trace")] impl Trace { - pub fn new(path: &std::path::Path) -> Result { + pub fn new(dir_path_name: &str) -> Result { + let path = std::path::Path::new(dir_path_name); log::info!("Tracing into '{:?}'", path); let mut file = std::fs::File::create(path.join(FILE_NAME))?; file.write_all(b"[\n")?; diff --git a/wgpu-core/src/hash_utils.rs b/wgpu-core/src/hash_utils.rs index 056c84f539..fa2db6bf27 100644 --- a/wgpu-core/src/hash_utils.rs +++ b/wgpu-core/src/hash_utils.rs @@ -4,10 +4,10 @@ /// HashMap using a fast, non-cryptographic hash algorithm. pub type FastHashMap = - std::collections::HashMap>; + hashbrown::HashMap>; /// HashSet using a fast, non-cryptographic hash algorithm. pub type FastHashSet = - std::collections::HashSet>; + hashbrown::HashSet>; /// IndexMap using a fast, non-cryptographic hash algorithm. pub type FastIndexMap = diff --git a/wgpu-core/src/indirect_validation.rs b/wgpu-core/src/indirect_validation.rs index 3045965435..e16828aede 100644 --- a/wgpu-core/src/indirect_validation.rs +++ b/wgpu-core/src/indirect_validation.rs @@ -226,7 +226,7 @@ impl IndirectValidation { let dst_buffer_desc = hal::BufferDescriptor { label: None, size: DST_BUFFER_SIZE.get(), - usage: hal::BufferUses::INDIRECT | hal::BufferUses::STORAGE_READ_WRITE, + usage: wgt::BufferUses::INDIRECT | wgt::BufferUses::STORAGE_READ_WRITE, memory_flags: hal::MemoryFlags::empty(), }; let dst_buffer = diff --git a/wgpu-core/src/init_tracker/texture.rs b/wgpu-core/src/init_tracker/texture.rs index 4bf7278f21..f3cc471aac 100644 --- a/wgpu-core/src/init_tracker/texture.rs +++ b/wgpu-core/src/init_tracker/texture.rs @@ -1,7 +1,8 @@ use super::{InitTracker, MemoryInitKind}; -use crate::{resource::Texture, track::TextureSelector}; +use crate::resource::Texture; use arrayvec::ArrayVec; use std::{ops::Range, sync::Arc}; +use wgt::TextureSelector; #[derive(Debug, Clone)] pub(crate) struct TextureInitRange { diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 6b1e721d4e..c19a51d58e 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -1,5 +1,7 @@ +use std::borrow::Cow; use std::sync::Arc; -use std::{borrow::Cow, collections::HashMap}; + +use hashbrown::HashMap; use crate::{ api_log, api_log_debug, @@ -73,12 +75,7 @@ impl Instance { let hal_desc = hal::InstanceDescriptor { name: "wgpu", flags: instance_desc.flags, - dx12_shader_compiler: instance_desc - .backend_options - .dx12 - .shader_compiler - .clone(), - gles_minor_version: instance_desc.backend_options.gl.gles_minor_version, + backend_options: instance_desc.backend_options.clone(), }; use hal::Instance as _; @@ -620,11 +617,17 @@ impl Adapter { hal_device: hal::DynOpenDevice, desc: &DeviceDescriptor, instance_flags: wgt::InstanceFlags, - trace_path: Option<&std::path::Path>, + trace_dir_name: Option<&str>, ) -> Result<(Arc, Arc), RequestDeviceError> { api_log!("Adapter::create_device"); - let device = Device::new(hal_device.device, self, desc, trace_path, instance_flags)?; + let device = Device::new( + hal_device.device, + self, + desc, + trace_dir_name, + instance_flags, + )?; let device = Arc::new(device); let queue = Queue::new(device.clone(), hal_device.queue)?; @@ -639,7 +642,7 @@ impl Adapter { self: &Arc, desc: &DeviceDescriptor, instance_flags: wgt::InstanceFlags, - trace_path: Option<&std::path::Path>, + trace_dir_name: Option<&str>, ) -> Result<(Arc, Arc), RequestDeviceError> { // Verify all features were exposed by the adapter if !self.raw.features.contains(desc.required_features) { @@ -686,7 +689,7 @@ impl Adapter { } .map_err(DeviceError::from_hal)?; - self.create_device_and_queue_from_hal(open, desc, instance_flags, trace_path) + self.create_device_and_queue_from_hal(open, desc, instance_flags, trace_dir_name) } } @@ -927,7 +930,7 @@ impl Global { &self, adapter_id: AdapterId, desc: &DeviceDescriptor, - trace_path: Option<&std::path::Path>, + trace_dir_name: Option<&str>, device_id_in: Option, queue_id_in: Option, ) -> Result<(DeviceId, QueueId), RequestDeviceError> { @@ -939,7 +942,7 @@ impl Global { let adapter = self.hub.adapters.get(adapter_id); let (device, queue) = - adapter.create_device_and_queue(desc, self.instance.flags, trace_path)?; + adapter.create_device_and_queue(desc, self.instance.flags, trace_dir_name)?; let device_id = device_fid.assign(device); resource_log!("Created Device {:?}", device_id); @@ -959,7 +962,7 @@ impl Global { adapter_id: AdapterId, hal_device: hal::DynOpenDevice, desc: &DeviceDescriptor, - trace_path: Option<&std::path::Path>, + trace_dir_name: Option<&str>, device_id_in: Option, queue_id_in: Option, ) -> Result<(DeviceId, QueueId), RequestDeviceError> { @@ -973,7 +976,7 @@ impl Global { hal_device, desc, self.instance.flags, - trace_path, + trace_dir_name, )?; let device_id = devices_fid.assign(device); diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index 4c2ea81490..47aaa87a51 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -56,6 +56,9 @@ // (the only reason to use wgpu-core on the web in the first place) that have atomics enabled. #![cfg_attr(not(send_sync), allow(clippy::arc_with_non_send_sync))] +extern crate wgpu_hal as hal; +extern crate wgpu_types as wgt; + pub mod binding_model; pub mod command; mod conv; diff --git a/wgpu-core/src/pool.rs b/wgpu-core/src/pool.rs index d14b8162e3..375b36c32e 100644 --- a/wgpu-core/src/pool.rs +++ b/wgpu-core/src/pool.rs @@ -1,9 +1,9 @@ use std::{ - collections::{hash_map::Entry, HashMap}, hash::Hash, sync::{Arc, Weak}, }; +use hashbrown::{hash_map::Entry, HashMap}; use once_cell::sync::OnceCell; use crate::lock::{rank, Mutex}; diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index f1b01a1a21..1646111635 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -89,8 +89,8 @@ pub enum ConfigureSurfaceError { }, #[error("Requested usage {requested:?} is not in the list of supported usages: {available:?}")] UnsupportedUsage { - requested: hal::TextureUses, - available: hal::TextureUses, + requested: wgt::TextureUses, + available: wgt::TextureUses, }, } @@ -170,7 +170,7 @@ impl Surface { ), format: config.format, dimension: wgt::TextureViewDimension::D2, - usage: hal::TextureUses::COLOR_TARGET, + usage: wgt::TextureUses::COLOR_TARGET, range: wgt::ImageSubresourceRange::default(), }; let clear_view = unsafe { @@ -200,7 +200,7 @@ impl Surface { .trackers .lock() .textures - .insert_single(&texture, hal::TextureUses::UNINITIALIZED); + .insert_single(&texture, wgt::TextureUses::UNINITIALIZED); if present.acquired_texture.is_some() { return Err(SurfaceError::AlreadyAcquired); diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs index 5681a8ac40..59968cd27b 100644 --- a/wgpu-core/src/ray_tracing.rs +++ b/wgpu-core/src/ray_tracing.rs @@ -181,7 +181,7 @@ pub struct TlasBuildEntry { pub struct TlasInstance<'a> { pub blas_id: BlasId, pub transform: &'a [f32; 12], - pub custom_index: u32, + pub custom_data: u32, pub mask: u8, } @@ -249,7 +249,7 @@ pub struct TraceBlasBuildEntry { pub struct TraceTlasInstance { pub blas_id: BlasId, pub transform: [f32; 12], - pub custom_index: u32, + pub custom_data: u32, pub mask: u8, } diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 9c2252e665..73435fc312 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -8,16 +8,21 @@ use crate::{ }, global::Global, hal_api::HalApi, - id::{AdapterId, BufferId, CommandEncoderId, DeviceId, SurfaceId, TextureId, TextureViewId}, + id::{ + AdapterId, BufferId, CommandEncoderId, DeviceId, QueueId, SurfaceId, TextureId, + TextureViewId, + }, init_tracker::{BufferInitTracker, TextureInitTracker}, lock::{rank, Mutex, RwLock}, resource_log, snatch::{SnatchGuard, Snatchable}, - track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex}, + track::{SharedTrackerIndexAllocator, TrackerIndex}, weak_vec::WeakVec, Label, LabelHelpers, SubmissionIndex, }; +use wgt::TextureSelector; + use smallvec::SmallVec; use thiserror::Error; @@ -455,8 +460,8 @@ impl Buffer { } let (pub_usage, internal_use) = match op.host { - HostMap::Read => (wgt::BufferUsages::MAP_READ, hal::BufferUses::MAP_READ), - HostMap::Write => (wgt::BufferUsages::MAP_WRITE, hal::BufferUses::MAP_WRITE), + HostMap::Read => (wgt::BufferUsages::MAP_READ, wgt::BufferUses::MAP_READ), + HostMap::Write => (wgt::BufferUsages::MAP_WRITE, wgt::BufferUses::MAP_WRITE), }; if let Err(e) = self.check_usage(pub_usage) { @@ -634,15 +639,15 @@ impl Buffer { let transition_src = hal::BufferBarrier { buffer: staging_buffer.raw(), usage: hal::StateTransition { - from: hal::BufferUses::MAP_WRITE, - to: hal::BufferUses::COPY_SRC, + from: wgt::BufferUses::MAP_WRITE, + to: wgt::BufferUses::COPY_SRC, }, }; let transition_dst = hal::BufferBarrier:: { buffer: raw_buf, usage: hal::StateTransition { - from: hal::BufferUses::empty(), - to: hal::BufferUses::COPY_DST, + from: wgt::BufferUses::empty(), + to: wgt::BufferUses::COPY_DST, }, }; let mut pending_writes = queue.pending_writes.lock(); @@ -856,7 +861,7 @@ impl StagingBuffer { let stage_desc = hal::BufferDescriptor { label: crate::hal_label(Some("(wgpu internal) Staging"), device.instance_flags), size: size.get(), - usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + usage: wgt::BufferUses::MAP_WRITE | wgt::BufferUses::COPY_SRC, memory_flags: hal::MemoryFlags::TRANSIENT, }; @@ -1010,7 +1015,7 @@ pub struct Texture { pub(crate) inner: Snatchable, pub(crate) device: Arc, pub(crate) desc: wgt::TextureDescriptor<(), Vec>, - pub(crate) hal_usage: hal::TextureUses, + pub(crate) hal_usage: wgt::TextureUses, pub(crate) format_features: wgt::TextureFormatFeatures, pub(crate) initialization_status: RwLock, pub(crate) full_range: TextureSelector, @@ -1026,7 +1031,7 @@ impl Texture { pub(crate) fn new( device: &Arc, inner: TextureInner, - hal_usage: hal::TextureUses, + hal_usage: wgt::TextureUses, desc: &TextureDescriptor, format_features: wgt::TextureFormatFeatures, clear_mode: TextureClearMode, @@ -1388,6 +1393,21 @@ impl Global { hal_command_encoder_callback(None) } } + + /// # Safety + /// + /// - The raw queue handle must not be manually destroyed + pub unsafe fn queue_as_hal(&self, id: QueueId, hal_queue_callback: F) -> R + where + F: FnOnce(Option<&A::Queue>) -> R, + { + profiling::scope!("Queue::as_hal"); + + let queue = self.hub.queues.get(id); + let hal_queue = queue.raw().as_any().downcast_ref(); + + hal_queue_callback(hal_queue) + } } /// A texture that has been marked as destroyed and is staged for actual deletion soon. diff --git a/wgpu-core/src/scratch.rs b/wgpu-core/src/scratch.rs index dcd2d28fb4..a8242be075 100644 --- a/wgpu-core/src/scratch.rs +++ b/wgpu-core/src/scratch.rs @@ -1,8 +1,8 @@ use crate::device::{Device, DeviceError}; use crate::resource_log; -use hal::BufferUses; use std::mem::ManuallyDrop; use std::sync::Arc; +use wgt::BufferUses; #[derive(Debug)] pub struct ScratchBuffer { diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs index cfd166070d..ba23dbcd6e 100644 --- a/wgpu-core/src/track/buffer.rs +++ b/wgpu-core/src/track/buffer.rs @@ -15,8 +15,8 @@ use crate::{ ResourceUsageCompatibilityError, ResourceUses, }, }; -use hal::{BufferBarrier, BufferUses}; -use wgt::{strict_assert, strict_assert_eq}; +use hal::BufferBarrier; +use wgt::{strict_assert, strict_assert_eq, BufferUses}; impl ResourceUses for BufferUses { const EXCLUSIVE: Self = Self::EXCLUSIVE; diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs index a0b91be5e6..3cd9a4ecba 100644 --- a/wgpu-core/src/track/mod.rs +++ b/wgpu-core/src/track/mod.rs @@ -118,8 +118,8 @@ pub(crate) use buffer::{ use metadata::{ResourceMetadata, ResourceMetadataProvider}; pub(crate) use stateless::StatelessTracker; pub(crate) use texture::{ - DeviceTextureTracker, TextureSelector, TextureTracker, TextureTrackerSetSingle, - TextureUsageScope, TextureViewBindGroupState, + DeviceTextureTracker, TextureTracker, TextureTrackerSetSingle, TextureUsageScope, + TextureViewBindGroupState, }; use wgt::strict_assert_ne; @@ -256,9 +256,9 @@ pub(crate) struct PendingTransition { pub usage: hal::StateTransition, } -pub(crate) type PendingTransitionList = Vec>; +pub(crate) type PendingTransitionList = Vec>; -impl PendingTransition { +impl PendingTransition { /// Produce the hal barrier corresponding to the transition. pub fn into_hal<'a>( self, @@ -273,15 +273,15 @@ impl PendingTransition { } } -impl PendingTransition { +impl PendingTransition { /// Produce the hal barrier corresponding to the transition. pub fn into_hal( self, texture: &dyn hal::DynTexture, ) -> hal::TextureBarrier<'_, dyn hal::DynTexture> { // These showing up in a barrier is always a bug - strict_assert_ne!(self.usage.from, hal::TextureUses::UNKNOWN); - strict_assert_ne!(self.usage.to, hal::TextureUses::UNKNOWN); + strict_assert_ne!(self.usage.from, wgt::TextureUses::UNKNOWN); + strict_assert_ne!(self.usage.to, wgt::TextureUses::UNKNOWN); let mip_count = self.selector.mips.end - self.selector.mips.start; strict_assert_ne!(mip_count, 0); @@ -341,7 +341,7 @@ pub enum ResourceUsageCompatibilityError { #[error("Attempted to use {res} with {invalid_use}.")] Buffer { res: ResourceErrorIdent, - invalid_use: InvalidUse, + invalid_use: InvalidUse, }, #[error( "Attempted to use {res} (mips {mip_levels:?} layers {array_layers:?}) with {invalid_use}." @@ -350,15 +350,15 @@ pub enum ResourceUsageCompatibilityError { res: ResourceErrorIdent, mip_levels: ops::Range, array_layers: ops::Range, - invalid_use: InvalidUse, + invalid_use: InvalidUse, }, } impl ResourceUsageCompatibilityError { fn from_buffer( buffer: &resource::Buffer, - current_state: hal::BufferUses, - new_state: hal::BufferUses, + current_state: wgt::BufferUses, + new_state: wgt::BufferUses, ) -> Self { Self::Buffer { res: buffer.error_ident(), @@ -371,9 +371,9 @@ impl ResourceUsageCompatibilityError { fn from_texture( texture: &resource::Texture, - selector: TextureSelector, - current_state: hal::TextureUses, - new_state: hal::TextureUses, + selector: wgt::TextureSelector, + current_state: wgt::TextureUses, + new_state: wgt::TextureUses, ) -> Self { Self::Texture { res: texture.error_ident(), @@ -641,7 +641,7 @@ impl Tracker { /// bind group as a source of which IDs to look at. The bind groups /// must have first been added to the usage scope. /// - /// Only stateful things are merged in herell other resources are owned + /// Only stateful things are merged in here, all other resources are owned /// indirectly by the bind group. /// /// # Safety diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs index 0a9a5f5489..268e81c4b2 100644 --- a/wgpu-core/src/track/texture.rs +++ b/wgpu-core/src/track/texture.rs @@ -27,27 +27,19 @@ use crate::{ ResourceUsageCompatibilityError, ResourceUses, }, }; -use hal::{TextureBarrier, TextureUses}; +use hal::TextureBarrier; use arrayvec::ArrayVec; use naga::FastHashMap; -use wgt::{strict_assert, strict_assert_eq}; +use wgt::{strict_assert, strict_assert_eq, TextureSelector, TextureUses}; use std::{ iter, - ops::Range, sync::{Arc, Weak}, vec::Drain, }; -/// Specifies a particular set of subresources in a texture. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct TextureSelector { - pub mips: Range, - pub layers: Range, -} - impl ResourceUses for TextureUses { const EXCLUSIVE: Self = Self::EXCLUSIVE; diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index 46df96cf38..22da19f5ba 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -1,6 +1,7 @@ use crate::{device::bgl, resource::InvalidResourceError, FastHashMap, FastHashSet}; use arrayvec::ArrayVec; -use std::{collections::hash_map::Entry, fmt}; +use hashbrown::hash_map::Entry; +use std::fmt; use thiserror::Error; use wgt::{BindGroupLayoutEntry, BindingType}; @@ -924,7 +925,7 @@ impl Interface { let mut resource_mapping = FastHashMap::default(); for (var_handle, var) in module.global_variables.iter() { let bind = match var.binding { - Some(ref br) => br.clone(), + Some(br) => br, _ => continue, }; let naga_ty = &module.types[var.ty].inner; @@ -1063,7 +1064,7 @@ impl Interface { BindingLayoutSource::Provided(layouts) => { // update the required binding size for this buffer if let ResourceType::Buffer { size } = res.ty { - match shader_binding_sizes.entry(res.bind.clone()) { + match shader_binding_sizes.entry(res.bind) { Entry::Occupied(e) => { *e.into_mut() = size.max(*e.get()); } @@ -1123,7 +1124,7 @@ impl Interface { } }; if let Err(error) = result { - return Err(StageError::Binding(res.bind.clone(), error)); + return Err(StageError::Binding(res.bind, error)); } } @@ -1164,8 +1165,8 @@ impl Interface { if let Some(error) = error { return Err(StageError::Filtering { - texture: texture_bind.clone(), - sampler: sampler_bind.clone(), + texture: *texture_bind, + sampler: *sampler_bind, error, }); } diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 7287246dfc..e3056e2c77 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -34,6 +34,9 @@ targets = [ # Cargo machete can't check build.rs dependencies. See https://github.com/bnjbvr/cargo-machete/issues/100 ignored = ["cfg_aliases"] +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(web_sys_unstable_apis)'] } + [lib] [features] @@ -57,12 +60,14 @@ vulkan = [ ] gles = [ "naga/glsl-out", + "once_cell/std", "dep:bytemuck", "dep:glow", "dep:glutin_wgl_sys", "dep:khronos-egl", "dep:libloading", "dep:ndk-sys", + "dep:once_cell", "windows/Win32_Graphics_OpenGL", "windows/Win32_Graphics_Gdi", "windows/Win32_System_LibraryLoader", @@ -77,6 +82,7 @@ dx12 = [ "dep:libloading", "dep:range-alloc", "dep:windows-core", + "dep:ordered-float", "gpu-allocator/d3d12", "naga/hlsl-out-if-target-windows", "windows/Win32_Graphics_Direct3D_Fxc", @@ -95,7 +101,9 @@ dx12 = [ ## Enables statically linking DXC. static-dxc = ["dep:mach-dxcompiler-rs"] renderdoc = ["dep:libloading", "dep:renderdoc-sys"] -fragile-send-sync-non-atomic-wasm = ["wgt/fragile-send-sync-non-atomic-wasm"] +fragile-send-sync-non-atomic-wasm = [ + "wgpu-types/fragile-send-sync-non-atomic-wasm", +] # Panic when running into an out-of-memory error (for debugging purposes). # # Only affects the d3d12 and vulkan backends. @@ -109,9 +117,6 @@ device_lost_panic = [] # Only affects the d3d12 and vulkan backends. internal_error_panic = [] -[lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(web_sys_unstable_apis)'] } - [[example]] name = "halmark" @@ -119,73 +124,100 @@ name = "halmark" name = "raw-gles" required-features = ["gles"] +##################### +### Platform: All ### +##################### + [dependencies] +naga.workspace = true +wgpu-types.workspace = true + +arrayvec.workspace = true bitflags.workspace = true +hashbrown.workspace = true +log.workspace = true +once_cell = { workspace = true, optional = true } +ordered-float = { workspace = true, optional = true } parking_lot.workspace = true profiling = { workspace = true, default-features = false } raw-window-handle.workspace = true -thiserror.workspace = true -once_cell.workspace = true -ordered-float = { workspace = true, optional = true } - -# backends common -arrayvec.workspace = true rustc-hash.workspace = true -log.workspace = true +thiserror.workspace = true -# backend: Gles +# Backend: GLES bytemuck = { workspace = true, optional = true } glow = { workspace = true, optional = true } -[dependencies.wgt] -package = "wgpu-types" -path = "../wgpu-types" -version = "24.0.0" +######################## +### Platform: Native ### +######################## [target.'cfg(not(target_arch = "wasm32"))'.dependencies] -# backend: Vulkan +# Backend: Vulkan ash = { workspace = true, optional = true } gpu-alloc = { workspace = true, optional = true } gpu-descriptor = { workspace = true, optional = true } smallvec = { workspace = true, optional = true, features = ["union"] } - +# Backend: GLES khronos-egl = { workspace = true, features = ["dynamic"], optional = true } libloading = { workspace = true, optional = true } renderdoc-sys = { workspace = true, optional = true } -[target.'cfg(target_os = "emscripten")'.dependencies] -khronos-egl = { workspace = true, features = ["static", "no-pkg-config"] } -#Note: it's unused by emscripten, but we keep it to have single code base in egl.rs -libloading = { workspace = true, optional = true } +########################## +### Platform: All Unix ### +########################## + +[target.'cfg(unix)'.dependencies] +# Backend: Vulkan +libc.workspace = true + +######################### +### Platform: Windows ### +######################### [target.'cfg(windows)'.dependencies] -# backend: Dx12 and Gles +# Backend: Dx12 and GLES windows = { workspace = true, optional = true } -# backend: Dx12 +windows-core = { workspace = true, optional = true } +# Backend: Dx12 bit-set = { workspace = true, optional = true } range-alloc = { workspace = true, optional = true } gpu-allocator = { workspace = true, optional = true } -# For core macros. This crate is also reexported as windows::core. -windows-core = { workspace = true, optional = true } - -# backend: Gles +# backend: GLES glutin_wgl_sys = { workspace = true, optional = true } +### Platform: x86/x86_64 Windows ### # This doesn't support aarch64. See https://github.com/gfx-rs/wgpu/issues/6860. # # ⚠️ Keep in sync with static_dxc cfg in build.rs and cfg_alias in `wgpu` crate ⚠️ [target.'cfg(all(windows, not(target_arch = "aarch64")))'.dependencies] mach-dxcompiler-rs = { workspace = true, optional = true } +####################### +### Platform: Apple ### +####################### + [target.'cfg(target_vendor = "apple")'.dependencies] -# backend: Metal +# Backend: Metal block = { workspace = true, optional = true } - +core-graphics-types.workspace = true metal.workspace = true objc.workspace = true -core-graphics-types.workspace = true + +######################### +### Platform: Android ### +######################### + +[target.'cfg(target_os = "android")'.dependencies] +android_system_properties = { workspace = true, optional = true } +ndk-sys = { workspace = true, optional = true } + +############################# +### Platform: Webassembly ### +############################# [target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies] +# Backend: GLES wasm-bindgen.workspace = true web-sys = { workspace = true, features = [ "default", @@ -196,44 +228,29 @@ web-sys = { workspace = true, features = [ ] } js-sys = { workspace = true, features = ["default"] } -[target.'cfg(unix)'.dependencies] -libc.workspace = true - -[target.'cfg(target_os = "android")'.dependencies] -android_system_properties = { workspace = true, optional = true } -ndk-sys = { workspace = true, optional = true } +############################ +### Platform: Emscripten ### +############################ -[dependencies.naga] -path = "../naga" -version = "24.0.0" +[target.'cfg(target_os = "emscripten")'.dependencies] +# Backend: GLES +khronos-egl = { workspace = true, features = ["static", "no-pkg-config"] } +# Note: it's unused by emscripten, but we keep it to have single code base in egl.rs +libloading = { workspace = true, optional = true } [build-dependencies] cfg_aliases.workspace = true -# DEV dependencies -[dev-dependencies.naga] -path = "../naga" -version = "24.0.0" -features = ["wgsl-in"] - [dev-dependencies] cfg-if.workspace = true env_logger.workspace = true -glam.workspace = true # for ray-traced-triangle example -winit.workspace = true # for "halmark" example +glam.workspace = true # for ray-traced-triangle example +naga = { workspace = true, features = ["wgsl-in"] } +winit.workspace = true # for "halmark" example +### Platform: Windows + MacOS + Linux for "raw-gles" example ### [target.'cfg(not(any(target_arch = "wasm32", target_os = "ios", target_os = "visionos")))'.dev-dependencies] -glutin-winit = { workspace = true, features = [ - "egl", - "wgl", - "wayland", - "x11", -] } # for "raw-gles" example -glutin = { workspace = true, features = [ - "egl", - "wgl", - "wayland", - "x11", -] } # for "raw-gles" example -rwh_05 = { version = "0.5", package = "raw-window-handle" } # temporary compatibility for glutin-winit in "raw-gles" example -winit = { workspace = true, features = ["rwh_05"] } # for "raw-gles" example +glutin-winit = { workspace = true, features = ["egl", "wgl", "wayland", "x11"] } +glutin = { workspace = true, features = ["egl", "wgl", "wayland", "x11"] } +rwh_05 = { version = "0.5", package = "raw-window-handle" } # temporary compatibility for glutin-winit +winit = { workspace = true, features = ["rwh_05"] } diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 2261203682..778cad0e0e 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -68,7 +68,7 @@ struct Example { instance: A::Instance, adapter: A::Adapter, surface: A::Surface, - surface_format: wgt::TextureFormat, + surface_format: wgpu_types::TextureFormat, device: A::Device, queue: A::Queue, global_group: A::BindGroup, @@ -95,10 +95,9 @@ impl Example { fn init(window: &winit::window::Window) -> Result> { let instance_desc = hal::InstanceDescriptor { name: "example", - flags: wgt::InstanceFlags::from_build_config().with_env(), + flags: wgpu_types::InstanceFlags::from_build_config().with_env(), // Can't rely on having DXC available, so use FXC instead - dx12_shader_compiler: wgt::Dx12Compiler::Fxc, - gles_minor_version: wgt::Gles3MinorVersion::default(), + backend_options: wgpu_types::BackendOptions::default(), }; let instance = unsafe { A::Instance::init(&instance_desc)? }; let surface = { @@ -128,9 +127,9 @@ impl Example { let hal::OpenDevice { device, queue } = unsafe { adapter .open( - wgt::Features::empty(), - &wgt::Limits::default(), - &wgt::MemoryHints::default(), + wgpu_types::Features::empty(), + &wgpu_types::Limits::default(), + &wgpu_types::MemoryHints::default(), ) .unwrap() }; @@ -141,15 +140,15 @@ impl Example { *surface_caps.maximum_frame_latency.start(), *surface_caps.maximum_frame_latency.end(), ), - present_mode: wgt::PresentMode::Fifo, - composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, - format: wgt::TextureFormat::Bgra8UnormSrgb, - extent: wgt::Extent3d { + present_mode: wgpu_types::PresentMode::Fifo, + composite_alpha_mode: wgpu_types::CompositeAlphaMode::Opaque, + format: wgpu_types::TextureFormat::Bgra8UnormSrgb, + extent: wgpu_types::Extent3d { width: window_size.0, height: window_size.1, depth_or_array_layers: 1, }, - usage: hal::TextureUses::COLOR_TARGET, + usage: wgpu_types::TextureUses::COLOR_TARGET, view_formats: vec![], }; unsafe { @@ -177,7 +176,7 @@ impl Example { }; let shader_desc = hal::ShaderModuleDescriptor { label: None, - runtime_checks: wgt::ShaderRuntimeChecks::checked(), + runtime_checks: wgpu_types::ShaderRuntimeChecks::checked(), }; let shader = unsafe { device @@ -189,30 +188,30 @@ impl Example { label: None, flags: hal::BindGroupLayoutFlags::empty(), entries: &[ - wgt::BindGroupLayoutEntry { + wgpu_types::BindGroupLayoutEntry { binding: 0, - visibility: wgt::ShaderStages::VERTEX, - ty: wgt::BindingType::Buffer { - ty: wgt::BufferBindingType::Uniform, + visibility: wgpu_types::ShaderStages::VERTEX, + ty: wgpu_types::BindingType::Buffer { + ty: wgpu_types::BufferBindingType::Uniform, has_dynamic_offset: false, - min_binding_size: wgt::BufferSize::new(size_of::() as _), + min_binding_size: wgpu_types::BufferSize::new(size_of::() as _), }, count: None, }, - wgt::BindGroupLayoutEntry { + wgpu_types::BindGroupLayoutEntry { binding: 1, - visibility: wgt::ShaderStages::FRAGMENT, - ty: wgt::BindingType::Texture { - sample_type: wgt::TextureSampleType::Float { filterable: true }, - view_dimension: wgt::TextureViewDimension::D2, + visibility: wgpu_types::ShaderStages::FRAGMENT, + ty: wgpu_types::BindingType::Texture { + sample_type: wgpu_types::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu_types::TextureViewDimension::D2, multisampled: false, }, count: None, }, - wgt::BindGroupLayoutEntry { + wgpu_types::BindGroupLayoutEntry { binding: 2, - visibility: wgt::ShaderStages::FRAGMENT, - ty: wgt::BindingType::Sampler(wgt::SamplerBindingType::Filtering), + visibility: wgpu_types::ShaderStages::FRAGMENT, + ty: wgpu_types::BindingType::Sampler(wgpu_types::SamplerBindingType::Filtering), count: None, }, ], @@ -224,13 +223,13 @@ impl Example { let local_bgl_desc = hal::BindGroupLayoutDescriptor { label: None, flags: hal::BindGroupLayoutFlags::empty(), - entries: &[wgt::BindGroupLayoutEntry { + entries: &[wgpu_types::BindGroupLayoutEntry { binding: 0, - visibility: wgt::ShaderStages::VERTEX, - ty: wgt::BindingType::Buffer { - ty: wgt::BufferBindingType::Uniform, + visibility: wgpu_types::ShaderStages::VERTEX, + ty: wgpu_types::BindingType::Buffer { + ty: wgpu_types::BufferBindingType::Uniform, has_dynamic_offset: true, - min_binding_size: wgt::BufferSize::new(size_of::() as _), + min_binding_size: wgpu_types::BufferSize::new(size_of::() as _), }, count: None, }], @@ -267,16 +266,16 @@ impl Example { constants: &constants, zero_initialize_workgroup_memory: true, }), - primitive: wgt::PrimitiveState { - topology: wgt::PrimitiveTopology::TriangleStrip, - ..wgt::PrimitiveState::default() + primitive: wgpu_types::PrimitiveState { + topology: wgpu_types::PrimitiveTopology::TriangleStrip, + ..wgpu_types::PrimitiveState::default() }, depth_stencil: None, - multisample: wgt::MultisampleState::default(), - color_targets: &[Some(wgt::ColorTargetState { + multisample: wgpu_types::MultisampleState::default(), + color_targets: &[Some(wgpu_types::ColorTargetState { format: surface_config.format, - blend: Some(wgt::BlendState::ALPHA_BLENDING), - write_mask: wgt::ColorWrites::default(), + blend: Some(wgpu_types::BlendState::ALPHA_BLENDING), + write_mask: wgpu_types::ColorWrites::default(), })], multiview: None, cache: None, @@ -287,8 +286,8 @@ impl Example { let staging_buffer_desc = hal::BufferDescriptor { label: Some("stage"), - size: texture_data.len() as wgt::BufferAddress, - usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + size: texture_data.len() as wgpu_types::BufferAddress, + usage: wgpu_types::BufferUses::MAP_WRITE | wgpu_types::BufferUses::COPY_SRC, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }; let staging_buffer = unsafe { device.create_buffer(&staging_buffer_desc).unwrap() }; @@ -307,16 +306,16 @@ impl Example { let texture_desc = hal::TextureDescriptor { label: None, - size: wgt::Extent3d { + size: wgpu_types::Extent3d { width: 1, height: 1, depth_or_array_layers: 1, }, mip_level_count: 1, sample_count: 1, - dimension: wgt::TextureDimension::D2, - format: wgt::TextureFormat::Rgba8UnormSrgb, - usage: hal::TextureUses::COPY_DST | hal::TextureUses::RESOURCE, + dimension: wgpu_types::TextureDimension::D2, + format: wgpu_types::TextureFormat::Rgba8UnormSrgb, + usage: wgpu_types::TextureUses::COPY_DST | wgpu_types::TextureUses::RESOURCE, memory_flags: hal::MemoryFlags::empty(), view_formats: vec![], }; @@ -332,34 +331,34 @@ impl Example { let buffer_barrier = hal::BufferBarrier { buffer: &staging_buffer, usage: hal::StateTransition { - from: hal::BufferUses::empty(), - to: hal::BufferUses::COPY_SRC, + from: wgpu_types::BufferUses::empty(), + to: wgpu_types::BufferUses::COPY_SRC, }, }; let texture_barrier1 = hal::TextureBarrier { texture: &texture, - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::UNINITIALIZED, - to: hal::TextureUses::COPY_DST, + from: wgpu_types::TextureUses::UNINITIALIZED, + to: wgpu_types::TextureUses::COPY_DST, }, }; let texture_barrier2 = hal::TextureBarrier { texture: &texture, - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::COPY_DST, - to: hal::TextureUses::RESOURCE, + from: wgpu_types::TextureUses::COPY_DST, + to: wgpu_types::TextureUses::RESOURCE, }, }; let copy = hal::BufferTextureCopy { - buffer_layout: wgt::TexelCopyBufferLayout { + buffer_layout: wgpu_types::TexelCopyBufferLayout { offset: 0, bytes_per_row: Some(4), rows_per_image: None, }, texture_base: hal::TextureCopyBase { - origin: wgt::Origin3d::ZERO, + origin: wgpu_types::Origin3d::ZERO, mip_level: 0, array_layer: 0, aspect: hal::FormatAspects::COLOR, @@ -380,10 +379,10 @@ impl Example { let sampler_desc = hal::SamplerDescriptor { label: None, - address_modes: [wgt::AddressMode::ClampToEdge; 3], - mag_filter: wgt::FilterMode::Linear, - min_filter: wgt::FilterMode::Nearest, - mipmap_filter: wgt::FilterMode::Nearest, + address_modes: [wgpu_types::AddressMode::ClampToEdge; 3], + mag_filter: wgpu_types::FilterMode::Linear, + min_filter: wgpu_types::FilterMode::Nearest, + mipmap_filter: wgpu_types::FilterMode::Nearest, lod_clamp: 0.0..32.0, compare: None, anisotropy_clamp: 1, @@ -405,8 +404,8 @@ impl Example { let global_buffer_desc = hal::BufferDescriptor { label: Some("global"), - size: size_of::() as wgt::BufferAddress, - usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + size: size_of::() as wgpu_types::BufferAddress, + usage: wgpu_types::BufferUses::MAP_WRITE | wgpu_types::BufferUses::UNIFORM, memory_flags: hal::MemoryFlags::PREFER_COHERENT, }; let global_buffer = unsafe { @@ -424,14 +423,15 @@ impl Example { buffer }; - let local_alignment = wgt::math::align_to( + let local_alignment = wgpu_types::math::align_to( size_of::() as u32, capabilities.limits.min_uniform_buffer_offset_alignment, ); let local_buffer_desc = hal::BufferDescriptor { label: Some("local"), - size: (MAX_BUNNIES as wgt::BufferAddress) * (local_alignment as wgt::BufferAddress), - usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + size: (MAX_BUNNIES as wgpu_types::BufferAddress) + * (local_alignment as wgpu_types::BufferAddress), + usage: wgpu_types::BufferUses::MAP_WRITE | wgpu_types::BufferUses::UNIFORM, memory_flags: hal::MemoryFlags::PREFER_COHERENT, }; let local_buffer = unsafe { device.create_buffer(&local_buffer_desc).unwrap() }; @@ -439,9 +439,9 @@ impl Example { let view_desc = hal::TextureViewDescriptor { label: None, format: texture_desc.format, - dimension: wgt::TextureViewDimension::D2, - usage: hal::TextureUses::RESOURCE, - range: wgt::ImageSubresourceRange::default(), + dimension: wgpu_types::TextureViewDimension::D2, + usage: wgpu_types::TextureUses::RESOURCE, + range: wgpu_types::ImageSubresourceRange::default(), }; let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; @@ -453,7 +453,7 @@ impl Example { }; let texture_binding = hal::TextureBinding { view: &texture_view, - usage: hal::TextureUses::RESOURCE, + usage: wgpu_types::TextureUses::RESOURCE, }; let global_group_desc = hal::BindGroupDescriptor { label: Some("global"), @@ -487,7 +487,7 @@ impl Example { let local_buffer_binding = hal::BufferBinding { buffer: &local_buffer, offset: 0, - size: wgt::BufferSize::new(size_of::() as _), + size: wgpu_types::BufferSize::new(size_of::() as _), }; let local_group_desc = hal::BindGroupDescriptor { label: Some("local"), @@ -649,7 +649,7 @@ impl Example { unsafe { let mapping = self .device - .map_buffer(&self.local_buffer, 0..size as wgt::BufferAddress) + .map_buffer(&self.local_buffer, 0..size as wgpu_types::BufferAddress) .unwrap(); ptr::copy_nonoverlapping( self.bunnies.as_ptr() as *const u8, @@ -673,10 +673,10 @@ impl Example { let target_barrier0 = hal::TextureBarrier { texture: surface_tex.borrow(), - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::UNINITIALIZED, - to: hal::TextureUses::COLOR_TARGET, + from: wgpu_types::TextureUses::UNINITIALIZED, + to: wgpu_types::TextureUses::COLOR_TARGET, }, }; unsafe { @@ -687,9 +687,9 @@ impl Example { let surface_view_desc = hal::TextureViewDescriptor { label: None, format: self.surface_format, - dimension: wgt::TextureViewDimension::D2, - usage: hal::TextureUses::COLOR_TARGET, - range: wgt::ImageSubresourceRange::default(), + dimension: wgpu_types::TextureViewDimension::D2, + usage: wgpu_types::TextureUses::COLOR_TARGET, + range: wgpu_types::ImageSubresourceRange::default(), }; let surface_tex_view = unsafe { self.device @@ -698,7 +698,7 @@ impl Example { }; let pass_desc = hal::RenderPassDescriptor { label: None, - extent: wgt::Extent3d { + extent: wgpu_types::Extent3d { width: self.extent[0], height: self.extent[1], depth_or_array_layers: 1, @@ -707,11 +707,11 @@ impl Example { color_attachments: &[Some(hal::ColorAttachment { target: hal::Attachment { view: &surface_tex_view, - usage: hal::TextureUses::COLOR_TARGET, + usage: wgpu_types::TextureUses::COLOR_TARGET, }, resolve_target: None, ops: hal::AttachmentOps::STORE, - clear_value: wgt::Color { + clear_value: wgpu_types::Color { r: 0.1, g: 0.2, b: 0.3, @@ -731,7 +731,8 @@ impl Example { } for i in 0..self.bunnies.len() { - let offset = (i as wgt::DynamicOffset) * (self.local_alignment as wgt::DynamicOffset); + let offset = (i as wgpu_types::DynamicOffset) + * (self.local_alignment as wgpu_types::DynamicOffset); unsafe { ctx.encoder .set_bind_group(&self.pipeline_layout, 1, &self.local_group, &[offset]); @@ -743,10 +744,10 @@ impl Example { let target_barrier1 = hal::TextureBarrier { texture: surface_tex.borrow(), - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::COLOR_TARGET, - to: hal::TextureUses::PRESENT, + from: wgpu_types::TextureUses::COLOR_TARGET, + to: wgpu_types::TextureUses::PRESENT, }, }; unsafe { diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs index bd086c2dce..5215a4282b 100644 --- a/wgpu-hal/examples/raw-gles.rs +++ b/wgpu-hal/examples/raw-gles.rs @@ -138,12 +138,15 @@ fn main() { println!("Hooking up to wgpu-hal"); exposed.get_or_insert_with(|| { unsafe { - ::Adapter::new_external(|name| { - // XXX: On WGL this should only be called after the context was made current - gl_config - .display() - .get_proc_address(&CString::new(name).expect(name)) - }) + ::Adapter::new_external( + |name| { + // XXX: On WGL this should only be called after the context was made current + gl_config + .display() + .get_proc_address(&CString::new(name).expect(name)) + }, + wgpu_types::GlBackendOptions::default(), + ) } .expect("GL adapter can't be initialized") }); @@ -273,14 +276,14 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height let od = unsafe { exposed.adapter.open( - wgt::Features::empty(), - &wgt::Limits::downlevel_defaults(), - &wgt::MemoryHints::default(), + wgpu_types::Features::empty(), + &wgpu_types::Limits::downlevel_defaults(), + &wgpu_types::MemoryHints::default(), ) } .unwrap(); - let format = wgt::TextureFormat::Rgba8UnormSrgb; + let format = wgpu_types::TextureFormat::Rgba8UnormSrgb; let texture = ::Texture::default_framebuffer(format); let view = unsafe { od.device @@ -289,9 +292,9 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height &hal::TextureViewDescriptor { label: None, format, - dimension: wgt::TextureViewDimension::D2, - usage: hal::TextureUses::COLOR_TARGET, - range: wgt::ImageSubresourceRange::default(), + dimension: wgpu_types::TextureViewDimension::D2, + usage: wgpu_types::TextureUses::COLOR_TARGET, + range: wgpu_types::ImageSubresourceRange::default(), }, ) .unwrap() @@ -309,7 +312,7 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height let mut fence = unsafe { od.device.create_fence().unwrap() }; let rp_desc = hal::RenderPassDescriptor { label: None, - extent: wgt::Extent3d { + extent: wgpu_types::Extent3d { width, height, depth_or_array_layers: 1, @@ -318,11 +321,11 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height color_attachments: &[Some(hal::ColorAttachment { target: hal::Attachment { view: &view, - usage: hal::TextureUses::COLOR_TARGET, + usage: wgpu_types::TextureUses::COLOR_TARGET, }, resolve_target: None, ops: hal::AttachmentOps::STORE, - clear_value: wgt::Color::BLUE, + clear_value: wgpu_types::Color::BLUE, })], depth_stencil_attachment: None, multiview: None, diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 61eccebdcb..6db9bba716 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -13,6 +13,7 @@ use std::{ ptr, time::Instant, }; +use wgpu_types::Dx12BackendOptions; use winit::window::WindowButtons; const DESIRED_MAX_LATENCY: u32 = 2; @@ -32,7 +33,7 @@ impl std::fmt::Debug for AccelerationStructureInstance { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Instance") .field("transform", &self.transform) - .field("custom_index()", &self.custom_index()) + .field("custom_data()", &self.custom_index()) .field("mask()", &self.mask()) .field( "shader_binding_table_record_offset()", @@ -196,7 +197,7 @@ struct Example { instance: A::Instance, adapter: A::Adapter, surface: A::Surface, - surface_format: wgt::TextureFormat, + surface_format: wgpu_types::TextureFormat, device: A::Device, queue: A::Queue, @@ -238,12 +239,13 @@ impl Example { let instance_desc = hal::InstanceDescriptor { name: "example", - flags: wgt::InstanceFlags::default(), - dx12_shader_compiler: wgt::Dx12Compiler::DynamicDxc { - dxc_path: "dxcompiler.dll".to_string(), - dxil_path: "dxil.dll".to_string(), + flags: wgpu_types::InstanceFlags::default(), + backend_options: wgpu_types::BackendOptions { + dx12: Dx12BackendOptions { + shader_compiler: wgpu_types::Dx12Compiler::default_dynamic_dxc(), + }, + ..Default::default() }, - gles_minor_version: wgt::Gles3MinorVersion::default(), }; let instance = unsafe { A::Instance::init(&instance_desc)? }; let surface = { @@ -274,8 +276,8 @@ impl Example { adapter .open( features, - &wgt::Limits::default(), - &wgt::MemoryHints::Performance, + &wgpu_types::Limits::default(), + &wgpu_types::MemoryHints::Performance, ) .unwrap() }; @@ -284,9 +286,9 @@ impl Example { dbg!(&surface_caps.formats); let surface_format = if surface_caps .formats - .contains(&wgt::TextureFormat::Rgba8Unorm) + .contains(&wgpu_types::TextureFormat::Rgba8Unorm) { - wgt::TextureFormat::Rgba8Unorm + wgpu_types::TextureFormat::Rgba8Unorm } else { *surface_caps.formats.first().unwrap() }; @@ -294,15 +296,15 @@ impl Example { maximum_frame_latency: DESIRED_MAX_LATENCY .max(*surface_caps.maximum_frame_latency.start()) .min(*surface_caps.maximum_frame_latency.end()), - present_mode: wgt::PresentMode::Fifo, - composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, + present_mode: wgpu_types::PresentMode::Fifo, + composite_alpha_mode: wgpu_types::CompositeAlphaMode::Opaque, format: surface_format, - extent: wgt::Extent3d { + extent: wgpu_types::Extent3d { width: window_size.0, height: window_size.1, depth_or_array_layers: 1, }, - usage: hal::TextureUses::COLOR_TARGET | hal::TextureUses::COPY_DST, + usage: wgpu_types::TextureUses::COLOR_TARGET | wgpu_types::TextureUses::COPY_DST, view_formats: vec![surface_format], }; unsafe { @@ -319,30 +321,30 @@ impl Example { label: None, flags: hal::BindGroupLayoutFlags::empty(), entries: &[ - wgt::BindGroupLayoutEntry { + wgpu_types::BindGroupLayoutEntry { binding: 0, - visibility: wgt::ShaderStages::COMPUTE, - ty: wgt::BindingType::Buffer { - ty: wgt::BufferBindingType::Uniform, + visibility: wgpu_types::ShaderStages::COMPUTE, + ty: wgpu_types::BindingType::Buffer { + ty: wgpu_types::BufferBindingType::Uniform, has_dynamic_offset: false, - min_binding_size: wgt::BufferSize::new(size_of::() as _), + min_binding_size: wgpu_types::BufferSize::new(size_of::() as _), }, count: None, }, - wgt::BindGroupLayoutEntry { + wgpu_types::BindGroupLayoutEntry { binding: 1, - visibility: wgt::ShaderStages::COMPUTE, - ty: wgt::BindingType::StorageTexture { - access: wgt::StorageTextureAccess::WriteOnly, - format: wgt::TextureFormat::Rgba8Unorm, - view_dimension: wgt::TextureViewDimension::D2, + visibility: wgpu_types::ShaderStages::COMPUTE, + ty: wgpu_types::BindingType::StorageTexture { + access: wgpu_types::StorageTextureAccess::WriteOnly, + format: wgpu_types::TextureFormat::Rgba8Unorm, + view_dimension: wgpu_types::TextureViewDimension::D2, }, count: None, }, - wgt::BindGroupLayoutEntry { + wgpu_types::BindGroupLayoutEntry { binding: 2, - visibility: wgt::ShaderStages::COMPUTE, - ty: wgt::BindingType::AccelerationStructure { + visibility: wgpu_types::ShaderStages::COMPUTE, + ty: wgpu_types::BindingType::AccelerationStructure { vertex_return: false, }, count: None, @@ -373,7 +375,7 @@ impl Example { }; let shader_desc = hal::ShaderModuleDescriptor { label: None, - runtime_checks: wgt::ShaderRuntimeChecks::checked(), + runtime_checks: wgpu_types::ShaderRuntimeChecks::checked(), }; let shader_module = unsafe { device @@ -421,8 +423,8 @@ impl Example { .create_buffer(&hal::BufferDescriptor { label: Some("vertices buffer"), size: vertices_size_in_bytes as u64, - usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + usage: wgpu_types::BufferUses::MAP_WRITE + | wgpu_types::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }) .unwrap(); @@ -447,8 +449,8 @@ impl Example { .create_buffer(&hal::BufferDescriptor { label: Some("indices buffer"), size: indices_size_in_bytes as u64, - usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + usage: wgpu_types::BufferUses::MAP_WRITE + | wgpu_types::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }) @@ -474,14 +476,14 @@ impl Example { let blas_triangles = vec![hal::AccelerationStructureTriangles { vertex_buffer: Some(&vertices_buffer), first_vertex: 0, - vertex_format: wgt::VertexFormat::Float32x3, + vertex_format: wgpu_types::VertexFormat::Float32x3, // each vertex is 3 floats, and floats are stored raw in the array vertex_count: vertices.len() as u32 / 3, vertex_stride: 3 * 4, indices: indices_buffer.as_ref().map(|(buf, len)| { hal::AccelerationStructureTriangleIndices { buffer: Some(buf), - format: wgt::IndexFormat::Uint32, + format: wgpu_types::IndexFormat::Uint32, offset: 0, count: *len as u32, } @@ -555,7 +557,7 @@ impl Example { .create_buffer(&hal::BufferDescriptor { label: Some("uniform buffer"), size: uniforms_size as u64, - usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + usage: wgpu_types::BufferUses::MAP_WRITE | wgpu_types::BufferUses::UNIFORM, memory_flags: hal::MemoryFlags::PREFER_COHERENT, }) .unwrap(); @@ -575,27 +577,27 @@ impl Example { let texture_desc = hal::TextureDescriptor { label: None, - size: wgt::Extent3d { + size: wgpu_types::Extent3d { width: 512, height: 512, depth_or_array_layers: 1, }, mip_level_count: 1, sample_count: 1, - dimension: wgt::TextureDimension::D2, - format: wgt::TextureFormat::Rgba8Unorm, - usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + dimension: wgpu_types::TextureDimension::D2, + format: wgpu_types::TextureFormat::Rgba8Unorm, + usage: wgpu_types::TextureUses::STORAGE_READ_WRITE | wgpu_types::TextureUses::COPY_SRC, memory_flags: hal::MemoryFlags::empty(), - view_formats: vec![wgt::TextureFormat::Rgba8Unorm], + view_formats: vec![wgpu_types::TextureFormat::Rgba8Unorm], }; let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; let view_desc = hal::TextureViewDescriptor { label: None, format: texture_desc.format, - dimension: wgt::TextureViewDimension::D2, - usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, - range: wgt::ImageSubresourceRange::default(), + dimension: wgpu_types::TextureViewDimension::D2, + usage: wgpu_types::TextureUses::STORAGE_READ_WRITE | wgpu_types::TextureUses::COPY_SRC, + range: wgpu_types::ImageSubresourceRange::default(), }; let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; @@ -607,7 +609,7 @@ impl Example { }; let texture_binding = hal::TextureBinding { view: &texture_view, - usage: hal::TextureUses::STORAGE_READ_WRITE, + usage: wgpu_types::TextureUses::STORAGE_READ_WRITE, }; let group_desc = hal::BindGroupDescriptor { label: Some("bind group"), @@ -644,7 +646,7 @@ impl Example { size: blas_sizes .build_scratch_size .max(tlas_sizes.build_scratch_size), - usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + usage: wgpu_types::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, memory_flags: hal::MemoryFlags::empty(), }) .unwrap() @@ -696,8 +698,8 @@ impl Example { .create_buffer(&hal::BufferDescriptor { label: Some("instances_buffer"), size: instances_buffer_size as u64, - usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + usage: wgpu_types::BufferUses::MAP_WRITE + | wgpu_types::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }) .unwrap(); @@ -756,8 +758,8 @@ impl Example { let scratch_buffer_barrier = hal::BufferBarrier { buffer: &scratch_buffer, usage: hal::StateTransition { - from: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, - to: hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + from: wgpu_types::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + to: wgpu_types::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, }, }; cmd_encoder.transition_buffers(iter::once(scratch_buffer_barrier)); @@ -791,10 +793,10 @@ impl Example { let texture_barrier = hal::TextureBarrier { texture: &texture, - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::UNINITIALIZED, - to: hal::TextureUses::STORAGE_READ_WRITE, + from: wgpu_types::TextureUses::UNINITIALIZED, + to: wgpu_types::TextureUses::STORAGE_READ_WRITE, }, }; @@ -865,10 +867,10 @@ impl Example { let target_barrier0 = hal::TextureBarrier { texture: surface_tex.borrow(), - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::UNINITIALIZED, - to: hal::TextureUses::COPY_DST, + from: wgpu_types::TextureUses::UNINITIALIZED, + to: wgpu_types::TextureUses::COPY_DST, }, }; @@ -937,8 +939,8 @@ impl Example { let scratch_buffer_barrier = hal::BufferBarrier { buffer: &self.scratch_buffer, usage: hal::StateTransition { - from: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, - to: hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + from: wgpu_types::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + to: wgpu_types::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, }, }; ctx.encoder @@ -950,9 +952,9 @@ impl Example { let surface_view_desc = hal::TextureViewDescriptor { label: None, format: self.surface_format, - dimension: wgt::TextureViewDimension::D2, - usage: hal::TextureUses::COPY_DST, - range: wgt::ImageSubresourceRange::default(), + dimension: wgpu_types::TextureViewDimension::D2, + usage: wgpu_types::TextureUses::COPY_DST, + range: wgpu_types::ImageSubresourceRange::default(), }; let surface_tex_view = unsafe { self.device @@ -974,26 +976,26 @@ impl Example { let target_barrier1 = hal::TextureBarrier { texture: surface_tex.borrow(), - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::COPY_DST, - to: hal::TextureUses::PRESENT, + from: wgpu_types::TextureUses::COPY_DST, + to: wgpu_types::TextureUses::PRESENT, }, }; let target_barrier2 = hal::TextureBarrier { texture: &self.texture, - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::STORAGE_READ_WRITE, - to: hal::TextureUses::COPY_SRC, + from: wgpu_types::TextureUses::STORAGE_READ_WRITE, + to: wgpu_types::TextureUses::COPY_SRC, }, }; let target_barrier3 = hal::TextureBarrier { texture: &self.texture, - range: wgt::ImageSubresourceRange::default(), + range: wgpu_types::ImageSubresourceRange::default(), usage: hal::StateTransition { - from: hal::TextureUses::COPY_SRC, - to: hal::TextureUses::STORAGE_READ_WRITE, + from: wgpu_types::TextureUses::COPY_SRC, + to: wgpu_types::TextureUses::STORAGE_READ_WRITE, }, }; unsafe { @@ -1001,19 +1003,19 @@ impl Example { ctx.encoder.transition_textures(iter::once(target_barrier2)); ctx.encoder.copy_texture_to_texture( &self.texture, - hal::TextureUses::COPY_SRC, + wgpu_types::TextureUses::COPY_SRC, surface_tex.borrow(), std::iter::once(hal::TextureCopy { src_base: hal::TextureCopyBase { mip_level: 0, array_layer: 0, - origin: wgt::Origin3d::ZERO, + origin: wgpu_types::Origin3d::ZERO, aspect: hal::FormatAspects::COLOR, }, dst_base: hal::TextureCopyBase { mip_level: 0, array_layer: 0, - origin: wgt::Origin3d::ZERO, + origin: wgpu_types::Origin3d::ZERO, aspect: hal::FormatAspects::COLOR, }, size: hal::CopyExtent { diff --git a/wgpu-hal/src/auxil/dxgi/conv.rs b/wgpu-hal/src/auxil/dxgi/conv.rs index a88853de11..2cf75cd4e2 100644 --- a/wgpu-hal/src/auxil/dxgi/conv.rs +++ b/wgpu-hal/src/auxil/dxgi/conv.rs @@ -181,7 +181,7 @@ pub fn map_texture_format_for_copy( pub fn map_texture_format_for_resource( format: wgt::TextureFormat, - usage: crate::TextureUses, + usage: wgt::TextureUses, has_view_formats: bool, casting_fully_typed_format_supported: bool, ) -> Dxgi::Common::DXGI_FORMAT { @@ -206,10 +206,10 @@ pub fn map_texture_format_for_resource( // We might view this resource as SRV/UAV but also as DSV } else if format.is_depth_stencil_format() && usage.intersects( - crate::TextureUses::RESOURCE - | crate::TextureUses::STORAGE_READ_ONLY - | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE, + wgt::TextureUses::RESOURCE + | wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE, ) { match format { diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index e71e2fce3a..8211fd9f94 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -154,6 +154,11 @@ impl super::Adapter { } .unwrap(); + if options.ResourceBindingTier.0 < Direct3D12::D3D12_RESOURCE_BINDING_TIER_2.0 { + // We require Tier 2 or higher for the ability to make samplers bindless in all cases. + return None; + } + let _depth_bounds_test_supported = { let mut features2 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS2::default(); unsafe { @@ -195,6 +200,32 @@ impl super::Adapter { .is_ok() }; + let mut max_sampler_descriptor_heap_size = + Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; + { + let mut features19 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS19::default(); + let res = unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS19, + <*mut _>::cast(&mut features19), + size_of_val(&features19) as u32, + ) + }; + + // Sometimes on Windows 11 23H2, the function returns success, even though the runtime + // does not know about `Options19`. This can cause this number to be 0 as the structure isn't written to. + // This value is nonsense and creating zero-sized sampler heaps can cause drivers to explode. + // As as we're guaranteed 2048 anyway, we make sure this value is not under 2048. + // + // https://github.com/gfx-rs/wgpu/issues/7053 + let is_ok = res.is_ok(); + let is_above_minimum = features19.MaxSamplerDescriptorHeapSize + > Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; + if is_ok && is_above_minimum { + max_sampler_descriptor_heap_size = features19.MaxSamplerDescriptorHeapSize; + } + }; + let shader_model = if dxc_container.is_none() { naga::back::hlsl::ShaderModel::V5_1 } else { @@ -260,6 +291,7 @@ impl super::Adapter { // See https://github.com/gfx-rs/wgpu/issues/3552 suballocation_supported: !info.name.contains("Iris(R) Xe"), shader_model, + max_sampler_descriptor_heap_size, }; // Theoretically vram limited, but in practice 2^20 is the limit @@ -339,6 +371,7 @@ impl super::Adapter { features.set( wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, shader_model >= naga::back::hlsl::ShaderModel::V5_1, @@ -514,8 +547,10 @@ impl super::Adapter { // for the descriptor table // - If a bind group has samplers it will consume a `DWORD` // for the descriptor table - // - Each dynamic buffer will consume `2 DWORDs` for the + // - Each dynamic uniform buffer will consume `2 DWORDs` for the // root descriptor + // - Each dynamic storage buffer will consume `1 DWORD` for a + // root constant representing the dynamic offset // - The special constants buffer count as constants // // Since we can't know beforehand all root signatures that @@ -824,9 +859,9 @@ impl crate::Adapter for super::Adapter { // See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency maximum_frame_latency: 1..=16, current_extent, - usage: crate::TextureUses::COLOR_TARGET - | crate::TextureUses::COPY_SRC - | crate::TextureUses::COPY_DST, + usage: wgt::TextureUses::COLOR_TARGET + | wgt::TextureUses::COPY_SRC + | wgt::TextureUses::COPY_DST, present_modes, composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], }) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 99cee37373..914eaa62a3 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -85,7 +85,7 @@ impl super::CommandEncoder { unsafe { list.SetDescriptorHeaps(&[ Some(self.shared.heap_views.raw.clone()), - Some(self.shared.heap_samplers.raw.clone()), + Some(self.shared.sampler_heap.heap().clone()), ]) }; } @@ -171,7 +171,7 @@ impl super::CommandEncoder { // Note: we have to call this lazily before draw calls. Otherwise, D3D complains // about the root parameters being incompatible with root signature. fn update_root_elements(&mut self) { - use super::{BufferViewKind as Bvk, PassKind as Pk}; + use super::PassKind as Pk; while self.pass.dirty_root_elements != 0 { let list = self.list.as_ref().unwrap(); @@ -217,30 +217,48 @@ impl super::CommandEncoder { Pk::Compute => unsafe { list.SetComputeRootDescriptorTable(index, descriptor) }, Pk::Transfer => (), }, - super::RootElement::DynamicOffsetBuffer { kind, address } => { + super::RootElement::DynamicUniformBuffer { address } => { let address = address.ptr; - match (self.pass.kind, kind) { - (Pk::Render, Bvk::Constant) => unsafe { + match self.pass.kind { + Pk::Render => unsafe { list.SetGraphicsRootConstantBufferView(index, address) }, - (Pk::Compute, Bvk::Constant) => unsafe { + Pk::Compute => unsafe { list.SetComputeRootConstantBufferView(index, address) }, - (Pk::Render, Bvk::ShaderResource) => unsafe { - list.SetGraphicsRootShaderResourceView(index, address) - }, - (Pk::Compute, Bvk::ShaderResource) => unsafe { - list.SetComputeRootShaderResourceView(index, address) - }, - (Pk::Render, Bvk::UnorderedAccess) => unsafe { - list.SetGraphicsRootUnorderedAccessView(index, address) - }, - (Pk::Compute, Bvk::UnorderedAccess) => unsafe { - list.SetComputeRootUnorderedAccessView(index, address) - }, - (Pk::Transfer, _) => (), + Pk::Transfer => (), } } + super::RootElement::DynamicOffsetsBuffer { start, end } => { + let values = &self.pass.dynamic_storage_buffer_offsets[start..end]; + + for (offset, &value) in values.iter().enumerate() { + match self.pass.kind { + Pk::Render => unsafe { + list.SetGraphicsRoot32BitConstant(index, value, offset as u32) + }, + Pk::Compute => unsafe { + list.SetComputeRoot32BitConstant(index, value, offset as u32) + }, + Pk::Transfer => (), + } + } + } + super::RootElement::SamplerHeap => match self.pass.kind { + Pk::Render => unsafe { + list.SetGraphicsRootDescriptorTable( + index, + self.shared.sampler_heap.gpu_descriptor_table(), + ) + }, + Pk::Compute => unsafe { + list.SetComputeRootDescriptorTable( + index, + self.shared.sampler_heap.gpu_descriptor_table(), + ) + }, + Pk::Transfer => (), + }, } } } @@ -254,6 +272,9 @@ impl super::CommandEncoder { other: 0, }; } + if let Some(root_index) = layout.sampler_heap_root_index { + self.pass.root_elements[root_index as usize] = super::RootElement::SamplerHeap; + } self.pass.layout = layout.clone(); self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1; } @@ -362,7 +383,7 @@ impl crate::CommandEncoder for super::CommandEncoder { }, }; self.temp.barriers.push(raw); - } else if barrier.usage.from == crate::BufferUses::STORAGE_READ_WRITE { + } else if barrier.usage.from == wgt::BufferUses::STORAGE_READ_WRITE { let raw = Direct3D12::D3D12_RESOURCE_BARRIER { Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_UAV, Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE, @@ -461,7 +482,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } } } - } else if barrier.usage.from == crate::TextureUses::STORAGE_READ_WRITE { + } else if barrier.usage.from == wgt::TextureUses::STORAGE_READ_WRITE { let raw = Direct3D12::D3D12_RESOURCE_BARRIER { Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_UAV, Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE, @@ -524,7 +545,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_texture( &mut self, src: &super::Texture, - _src_usage: crate::TextureUses, + _src_usage: wgt::TextureUses, dst: &super::Texture, regions: T, ) where @@ -605,7 +626,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_buffer( &mut self, src: &super::Texture, - _src_usage: crate::TextureUses, + _src_usage: wgt::TextureUses, dst: &super::Buffer, regions: T, ) where @@ -714,7 +735,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } let ds_view = desc.depth_stencil_attachment.as_ref().map(|ds| { - if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE { + if ds.target.usage == wgt::TextureUses::DEPTH_STENCIL_WRITE { ds.target.view.handle_dsv_rw.as_ref().unwrap().raw } else { ds.target.view.handle_dsv_ro.as_ref().unwrap().raw @@ -907,27 +928,51 @@ impl crate::CommandEncoder for super::CommandEncoder { root_index += 1; } - // Bind Sampler descriptor tables. - if info.tables.contains(super::TableTypes::SAMPLERS) { - self.pass.root_elements[root_index] = - super::RootElement::Table(group.handle_samplers.unwrap().gpu); - root_index += 1; - } + let mut offsets_index = 0; + if let Some(dynamic_storage_buffer_offsets) = info.dynamic_storage_buffer_offsets.as_ref() { + let root_index = dynamic_storage_buffer_offsets.root_index; + let range = &dynamic_storage_buffer_offsets.range; - // Bind root descriptors - for ((&kind, &gpu_base), &offset) in info - .dynamic_buffers - .iter() - .zip(group.dynamic_buffers.iter()) - .zip(dynamic_offsets) - { - self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer { - kind, - address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: gpu_base.ptr + offset as u64, - }, + if range.end > self.pass.dynamic_storage_buffer_offsets.len() { + self.pass + .dynamic_storage_buffer_offsets + .resize(range.end, 0); + } + + offsets_index += range.start; + + self.pass.root_elements[root_index as usize] = + super::RootElement::DynamicOffsetsBuffer { + start: range.start, + end: range.end, + }; + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= 1 << root_index; + } else { + // D3D12 requires full reset on signature change + // but we don't reset it here since it will be reset below }; - root_index += 1; + } + + // Bind root descriptors for dynamic uniform buffers + // or set root constants for offsets of dynamic storage buffers + for (&dynamic_buffer, &offset) in group.dynamic_buffers.iter().zip(dynamic_offsets) { + match dynamic_buffer { + super::DynamicBuffer::Uniform(gpu_base) => { + self.pass.root_elements[root_index] = + super::RootElement::DynamicUniformBuffer { + address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { + ptr: gpu_base.ptr + offset as u64, + }, + }; + root_index += 1; + } + super::DynamicBuffer::Storage => { + self.pass.dynamic_storage_buffer_offsets[offsets_index] = offset; + offsets_index += 1; + } + } } if self.pass.layout.signature == layout.shared.signature { diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 24d5160182..928be31781 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -1,10 +1,10 @@ use windows::Win32::Graphics::{Direct3D, Direct3D12}; pub fn map_buffer_usage_to_resource_flags( - usage: crate::BufferUses, + usage: wgt::BufferUses, ) -> Direct3D12::D3D12_RESOURCE_FLAGS { let mut flags = Direct3D12::D3D12_RESOURCE_FLAG_NONE; - if usage.contains(crate::BufferUses::STORAGE_READ_WRITE) { + if usage.contains(wgt::BufferUses::STORAGE_READ_WRITE) { flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } flags @@ -19,25 +19,25 @@ pub fn map_texture_dimension(dim: wgt::TextureDimension) -> Direct3D12::D3D12_RE } pub fn map_texture_usage_to_resource_flags( - usage: crate::TextureUses, + usage: wgt::TextureUses, ) -> Direct3D12::D3D12_RESOURCE_FLAGS { let mut flags = Direct3D12::D3D12_RESOURCE_FLAG_NONE; - if usage.contains(crate::TextureUses::COLOR_TARGET) { + if usage.contains(wgt::TextureUses::COLOR_TARGET) { flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; } - if usage.intersects( - crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, - ) { + if usage + .intersects(wgt::TextureUses::DEPTH_STENCIL_READ | wgt::TextureUses::DEPTH_STENCIL_WRITE) + { flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; - if !usage.contains(crate::TextureUses::RESOURCE) { + if !usage.contains(wgt::TextureUses::RESOURCE) { flags |= Direct3D12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; } } if usage.intersects( - crate::TextureUses::STORAGE_READ_ONLY - | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE, + wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE, ) { flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } @@ -116,8 +116,8 @@ pub fn map_binding_type(ty: &wgt::BindingType) -> Direct3D12::D3D12_DESCRIPTOR_R } } -pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> Direct3D12::D3D12_RESOURCE_STATES { - use crate::BufferUses as Bu; +pub fn map_buffer_usage_to_state(usage: wgt::BufferUses) -> Direct3D12::D3D12_RESOURCE_STATES { + use wgt::BufferUses as Bu; let mut state = Direct3D12::D3D12_RESOURCE_STATE_COMMON; if usage.intersects(Bu::COPY_SRC) { @@ -144,12 +144,12 @@ pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> Direct3D12::D3D12_ state } -pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> Direct3D12::D3D12_RESOURCE_STATES { - use crate::TextureUses as Tu; +pub fn map_texture_usage_to_state(usage: wgt::TextureUses) -> Direct3D12::D3D12_RESOURCE_STATES { + use wgt::TextureUses as Tu; let mut state = Direct3D12::D3D12_RESOURCE_STATE_COMMON; //Note: `RESOLVE_SOURCE` and `RESOLVE_DEST` are not used here //Note: `PRESENT` is the same as `COMMON` - if usage == crate::TextureUses::UNINITIALIZED { + if usage == wgt::TextureUses::UNINITIALIZED { return state; } diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs index f3b7f26f25..d191d87765 100644 --- a/wgpu-hal/src/dx12/descriptor.rs +++ b/wgpu-hal/src/dx12/descriptor.rs @@ -284,6 +284,11 @@ impl CpuHeap { } pub(super) fn at(&self, index: u32) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE { + debug_assert!( + index < self.total, + "Index ({index}) out of bounds {total}", + total = self.total + ); Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE { ptr: self.start.ptr + (self.handle_size * index) as usize, } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 5cff5af333..681ac4cec4 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,4 +1,5 @@ use std::{ + borrow::Cow, ffi, mem::{self, size_of, size_of_val}, num::NonZeroU32, @@ -20,7 +21,10 @@ use windows::{ use super::{conv, descriptor, D3D12Lib}; use crate::{ auxil::{self, dxgi::result::HResult}, - dx12::{borrow_optional_interface_temporarily, shader_compilation, Event}, + dx12::{ + borrow_optional_interface_temporarily, shader_compilation, DynamicStorageBufferOffsets, + Event, + }, AccelerationStructureEntries, TlasInstance, }; @@ -100,7 +104,6 @@ impl super::Device { // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 let capacity_views = limits.max_non_sampler_bindings as u64; - let capacity_samplers = 2_048; let shared = super::DeviceShared { zero_buffer, @@ -141,11 +144,7 @@ impl super::Device { Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, capacity_views, )?, - heap_samplers: descriptor::GeneralHeap::new( - &raw, - Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - capacity_samplers, - )?, + sampler_heap: super::sampler::SamplerHeap::new(&raw, &private_caps)?, }; let mut rtv_pool = @@ -188,10 +187,6 @@ impl super::Device { raw.clone(), Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, )), - sampler_pool: Mutex::new(descriptor::CpuPool::new( - raw, - Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - )), library: Arc::clone(library), #[cfg(feature = "renderdoc")] render_doc: Default::default(), @@ -280,12 +275,15 @@ impl super::Device { let needs_temp_options = stage.zero_initialize_workgroup_memory != layout.naga_options.zero_initialize_workgroup_memory - || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing; + || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing + || stage.module.runtime_checks.force_loop_bounding + != layout.naga_options.force_loop_bounding; let mut temp_options; let naga_options = if needs_temp_options { temp_options = layout.naga_options.clone(); temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; temp_options.restrict_indexing = stage.module.runtime_checks.bounds_checks; + temp_options.force_loop_bounding = stage.module.runtime_checks.force_loop_bounding; &temp_options } else { &layout.naga_options @@ -404,7 +402,7 @@ impl crate::Device for super::Device { desc: &crate::BufferDescriptor, ) -> Result { let mut size = desc.size; - if desc.usage.contains(crate::BufferUses::UNIFORM) { + if desc.usage.contains(wgt::BufferUses::UNIFORM) { let align_mask = Direct3D12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; size = ((size - 1) | align_mask) + 1; } @@ -565,7 +563,7 @@ impl crate::Device for super::Device { texture.resource.clone(), texture.calc_subresource(desc.range.base_mip_level, desc.range.base_array_layer, 0), ), - handle_srv: if desc.usage.intersects(crate::TextureUses::RESOURCE) { + handle_srv: if desc.usage.intersects(wgt::TextureUses::RESOURCE) { match unsafe { view_desc.to_srv() } { Some(raw_desc) => { let handle = self.srv_uav_pool.lock().alloc_handle()?; @@ -584,9 +582,9 @@ impl crate::Device for super::Device { None }, handle_uav: if desc.usage.intersects( - crate::TextureUses::STORAGE_READ_ONLY - | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE, + wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE, ) { match unsafe { view_desc.to_uav() } { Some(raw_desc) => { @@ -606,7 +604,7 @@ impl crate::Device for super::Device { } else { None }, - handle_rtv: if desc.usage.intersects(crate::TextureUses::COLOR_TARGET) { + handle_rtv: if desc.usage.intersects(wgt::TextureUses::COLOR_TARGET) { let raw_desc = unsafe { view_desc.to_rtv() }; let handle = self.rtv_pool.lock().alloc_handle()?; unsafe { @@ -617,10 +615,7 @@ impl crate::Device for super::Device { } else { None }, - handle_dsv_ro: if desc - .usage - .intersects(crate::TextureUses::DEPTH_STENCIL_READ) - { + handle_dsv_ro: if desc.usage.intersects(wgt::TextureUses::DEPTH_STENCIL_READ) { let raw_desc = unsafe { view_desc.to_dsv(true) }; let handle = self.dsv_pool.lock().alloc_handle()?; unsafe { @@ -631,10 +626,7 @@ impl crate::Device for super::Device { } else { None }, - handle_dsv_rw: if desc - .usage - .intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) - { + handle_dsv_rw: if desc.usage.intersects(wgt::TextureUses::DEPTH_STENCIL_WRITE) { let raw_desc = unsafe { view_desc.to_dsv(false) }; let handle = self.dsv_pool.lock().alloc_handle()?; unsafe { @@ -678,8 +670,6 @@ impl crate::Device for super::Device { &self, desc: &crate::SamplerDescriptor, ) -> Result { - let handle = self.sampler_pool.lock().alloc_handle()?; - let reduction = match desc.compare { Some(_) => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, None => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_STANDARD, @@ -697,34 +687,39 @@ impl crate::Device for super::Device { let border_color = conv::map_border_color(desc.border_color); - unsafe { - self.raw.CreateSampler( - &Direct3D12::D3D12_SAMPLER_DESC { - Filter: filter, - AddressU: conv::map_address_mode(desc.address_modes[0]), - AddressV: conv::map_address_mode(desc.address_modes[1]), - AddressW: conv::map_address_mode(desc.address_modes[2]), - MipLODBias: 0f32, - MaxAnisotropy: desc.anisotropy_clamp as u32, - - ComparisonFunc: conv::map_comparison( - desc.compare.unwrap_or(wgt::CompareFunction::Always), - ), - BorderColor: border_color, - MinLOD: desc.lod_clamp.start, - MaxLOD: desc.lod_clamp.end, - }, - handle.raw, - ) + let raw_desc = Direct3D12::D3D12_SAMPLER_DESC { + Filter: filter, + AddressU: conv::map_address_mode(desc.address_modes[0]), + AddressV: conv::map_address_mode(desc.address_modes[1]), + AddressW: conv::map_address_mode(desc.address_modes[2]), + MipLODBias: 0f32, + MaxAnisotropy: desc.anisotropy_clamp as u32, + + ComparisonFunc: conv::map_comparison( + desc.compare.unwrap_or(wgt::CompareFunction::Always), + ), + BorderColor: border_color, + MinLOD: desc.lod_clamp.start, + MaxLOD: desc.lod_clamp.end, }; + let index = self + .shared + .sampler_heap + .create_sampler(&self.raw, raw_desc)?; + self.counters.samplers.add(1); - Ok(super::Sampler { handle }) + Ok(super::Sampler { + index, + desc: raw_desc, + }) } unsafe fn destroy_sampler(&self, sampler: super::Sampler) { - self.sampler_pool.lock().free_handle(sampler.handle); + self.shared + .sampler_heap + .destroy_sampler(sampler.desc, sampler.index); self.counters.samplers.sub(1); } @@ -763,33 +758,30 @@ impl crate::Device for super::Device { &self, desc: &crate::BindGroupLayoutDescriptor, ) -> Result { - let ( - mut num_buffer_views, - mut num_samplers, - mut num_texture_views, - mut num_acceleration_structures, - ) = (0, 0, 0, 0); + let mut num_views = 0; + let mut has_sampler_in_group = false; for entry in desc.entries.iter() { let count = entry.count.map_or(1, NonZeroU32::get); match entry.ty { wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, has_dynamic_offset: true, .. } => {} - wgt::BindingType::Buffer { .. } => num_buffer_views += count, - wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { - num_texture_views += count - } - wgt::BindingType::Sampler { .. } => num_samplers += count, - wgt::BindingType::AccelerationStructure { .. } => { - num_acceleration_structures += count - } + wgt::BindingType::Buffer { .. } + | wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { .. } + | wgt::BindingType::AccelerationStructure { .. } => num_views += count, + wgt::BindingType::Sampler { .. } => has_sampler_in_group = true, } } + if has_sampler_in_group { + num_views += 1; + } + self.counters.bind_group_layouts.add(1); - let num_views = num_buffer_views + num_texture_views + num_acceleration_structures; Ok(super::BindGroupLayout { entries: desc.entries.to_vec(), cpu_heap_views: if num_views != 0 { @@ -802,17 +794,7 @@ impl crate::Device for super::Device { } else { None }, - cpu_heap_samplers: if num_samplers != 0 { - let heap = descriptor::CpuHeap::new( - &self.raw, - Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - num_samplers, - )?; - Some(heap) - } else { - None - }, - copy_counts: vec![1; num_views.max(num_samplers) as usize], + copy_counts: vec![1; num_views as usize], }) } @@ -829,26 +811,32 @@ impl crate::Device for super::Device { // // Push Constants are implemented as root constants. // - // Each descriptor set layout will be one table entry of the root signature. + // Each bind group layout will be one table entry of the root signature. // We have the additional restriction that SRV/CBV/UAV and samplers need to be // separated, so each set layout will actually occupy up to 2 entries! // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, // and finally dynamic uniform descriptors. // - // Buffers with dynamic offsets are implemented as root descriptors. + // Uniform buffers with dynamic offsets are implemented as root descriptors. // This is easier than trying to patch up the offset on the shader side. // + // Storage buffers with dynamic offsets are part of a descriptor table and + // the dynamic offsets are passed via root constants. + // // Root signature layout: // Root Constants: Parameter=0, Space=0 // ... // (bind group [0]) - Space=0 // View descriptor table, if any - // Sampler descriptor table, if any + // Sampler buffer descriptor table, if any // Root descriptors (for dynamic offset buffers) // (bind group [1]) - Space=0 // ... // (bind group [2]) - Space=0 // Special constant buffer: Space=0 + // Sampler descriptor tables: Space=0 + // SamplerState Array: Space=0, Register=0-2047 + // SamplerComparisonState Array: Space=0, Register=2048-4095 //TODO: put lower bind group indices further down the root signature. See: // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model @@ -856,12 +844,10 @@ impl crate::Device for super::Device { // on Vulkan-like layout compatibility rules. let mut binding_map = hlsl::BindingMap::default(); - let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = ( - hlsl::BindTarget::default(), - hlsl::BindTarget::default(), - hlsl::BindTarget::default(), - hlsl::BindTarget::default(), - ); + let mut sampler_buffer_binding_map = hlsl::SamplerIndexBufferBindingMap::default(); + let mut bind_cbv = hlsl::BindTarget::default(); + let mut bind_srv = hlsl::BindTarget::default(); + let mut bind_uav = hlsl::BindTarget::default(); let mut parameters = Vec::new(); let mut push_constants_target = None; let mut root_constant_info = None; @@ -888,7 +874,7 @@ impl crate::Device for super::Device { }, ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, }); - let binding = bind_cbv.clone(); + let binding = bind_cbv; bind_cbv.register += 1; root_constant_info = Some(super::RootConstantInfo { root_index: parameter_index as u32, @@ -899,22 +885,41 @@ impl crate::Device for super::Device { bind_cbv.space += 1; } + let mut dynamic_storage_buffer_offsets_targets = std::collections::BTreeMap::new(); + let mut total_dynamic_storage_buffers = 0; + // Collect the whole number of bindings we will create upfront. // It allows us to preallocate enough storage to avoid reallocation, // which could cause invalid pointers. - let total_non_dynamic_entries = desc - .bind_group_layouts - .iter() - .flat_map(|bgl| { - bgl.entries.iter().map(|entry| match entry.ty { + let mut total_non_dynamic_entries = 0_usize; + let mut sampler_in_any_bind_group = false; + for bgl in desc.bind_group_layouts { + let mut sampler_in_bind_group = false; + + for entry in &bgl.entries { + match entry.ty { wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, has_dynamic_offset: true, .. - } => 0, - _ => 1, - }) - }) - .sum(); + } => {} + wgt::BindingType::Sampler(_) => sampler_in_bind_group = true, + _ => total_non_dynamic_entries += 1, + } + } + + if sampler_in_bind_group { + // One for the sampler buffer + total_non_dynamic_entries += 1; + sampler_in_any_bind_group = true; + } + } + + if sampler_in_any_bind_group { + // Two for the sampler arrays themselves + total_non_dynamic_entries += 2; + } + let mut ranges = Vec::with_capacity(total_non_dynamic_entries); let mut bind_group_infos = @@ -923,32 +928,48 @@ impl crate::Device for super::Device { let mut info = super::BindGroupInfo { tables: super::TableTypes::empty(), base_root_index: parameters.len() as u32, - dynamic_buffers: Vec::new(), + dynamic_storage_buffer_offsets: None, }; let mut visibility_view_static = wgt::ShaderStages::empty(); - let mut visibility_view_dynamic = wgt::ShaderStages::empty(); - let mut visibility_sampler = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic_uniform = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic_storage = wgt::ShaderStages::empty(); for entry in bgl.entries.iter() { match entry.ty { - wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility, + wgt::BindingType::Sampler { .. } => { + visibility_view_static |= wgt::ShaderStages::all() + } + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: true, + .. + } => visibility_view_dynamic_uniform |= entry.visibility, wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, has_dynamic_offset: true, .. - } => visibility_view_dynamic |= entry.visibility, + } => visibility_view_dynamic_storage |= entry.visibility, _ => visibility_view_static |= entry.visibility, } } + let mut dynamic_storage_buffers = 0; + // SRV/CBV/UAV descriptor tables - let mut range_base = ranges.len(); + let range_base = ranges.len(); for entry in bgl.entries.iter() { - let range_ty = match entry.ty { + let (range_ty, has_dynamic_offset) = match entry.ty { wgt::BindingType::Buffer { + ty, has_dynamic_offset: true, .. - } => continue, - ref other => conv::map_binding_type(other), + } => match ty { + wgt::BufferBindingType::Uniform => continue, + wgt::BufferBindingType::Storage { .. } => { + (conv::map_binding_type(&entry.ty), true) + } + }, + ref other => (conv::map_binding_type(other), false), }; let bt = match range_ty { Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_CBV => &mut bind_cbv, @@ -958,14 +979,29 @@ impl crate::Device for super::Device { _ => todo!(), }; + let binding_array_size = entry.count.map(NonZeroU32::get); + + let dynamic_storage_buffer_offsets_index = if has_dynamic_offset { + debug_assert!( + binding_array_size.is_none(), + "binding arrays and dynamic buffers are mutually exclusive" + ); + let ret = Some(dynamic_storage_buffers); + dynamic_storage_buffers += 1; + ret + } else { + None + }; + binding_map.insert( naga::ResourceBinding { group: index as u32, binding: entry.binding, }, hlsl::BindTarget { - binding_array_size: entry.count.map(NonZeroU32::get), - ..bt.clone() + binding_array_size, + dynamic_storage_buffer_offsets_index, + ..*bt }, ); ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE { @@ -978,50 +1014,46 @@ impl crate::Device for super::Device { }); bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1); } - if ranges.len() > range_base { - let range = &ranges[range_base..]; - parameters.push(Direct3D12::D3D12_ROOT_PARAMETER { - ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, - Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 { - DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE { - NumDescriptorRanges: range.len() as u32, - pDescriptorRanges: range.as_ptr(), + + let mut sampler_index_within_bind_group = 0; + for entry in bgl.entries.iter() { + if let wgt::BindingType::Sampler(_) = entry.ty { + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, }, - }, - ShaderVisibility: conv::map_visibility(visibility_view_static), - }); - info.tables |= super::TableTypes::SRV_CBV_UAV; + hlsl::BindTarget { + // Naga does not use the space field for samplers + space: 255, + register: sampler_index_within_bind_group, + binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, + }, + ); + sampler_index_within_bind_group += 1; + } } - // Sampler descriptor tables - range_base = ranges.len(); - for entry in bgl.entries.iter() { - let range_ty = match entry.ty { - wgt::BindingType::Sampler { .. } => { - Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER - } - _ => continue, - }; - binding_map.insert( - naga::ResourceBinding { + if sampler_index_within_bind_group != 0 { + sampler_buffer_binding_map.insert( + hlsl::SamplerIndexBufferKey { group: index as u32, - binding: entry.binding, - }, - hlsl::BindTarget { - binding_array_size: entry.count.map(NonZeroU32::get), - ..bind_sampler.clone() }, + bind_srv, ); ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE { - RangeType: range_ty, - NumDescriptors: entry.count.map_or(1, |count| count.get()), - BaseShaderRegister: bind_sampler.register, - RegisterSpace: bind_sampler.space as u32, + RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + NumDescriptors: 1, + BaseShaderRegister: bind_srv.register, + RegisterSpace: bind_srv.space as u32, OffsetInDescriptorsFromTableStart: Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, }); - bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + bind_srv.register += 1; } + if ranges.len() > range_base { let range = &ranges[range_base..]; parameters.push(Direct3D12::D3D12_ROOT_PARAMETER { @@ -1032,41 +1064,23 @@ impl crate::Device for super::Device { pDescriptorRanges: range.as_ptr(), }, }, - ShaderVisibility: conv::map_visibility(visibility_sampler), + ShaderVisibility: conv::map_visibility(visibility_view_static), }); - info.tables |= super::TableTypes::SAMPLERS; + info.tables |= super::TableTypes::SRV_CBV_UAV; } - // Root (dynamic) descriptor tables - let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); + // Root descriptors for dynamic uniform buffers + let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic_uniform); for entry in bgl.entries.iter() { - let buffer_ty = match entry.ty { + match entry.ty { wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, has_dynamic_offset: true, - ty, .. - } => ty, + } => {} _ => continue, }; - let (kind, parameter_ty, bt) = match buffer_ty { - wgt::BufferBindingType::Uniform => ( - super::BufferViewKind::Constant, - Direct3D12::D3D12_ROOT_PARAMETER_TYPE_CBV, - &mut bind_cbv, - ), - wgt::BufferBindingType::Storage { read_only: true } => ( - super::BufferViewKind::ShaderResource, - Direct3D12::D3D12_ROOT_PARAMETER_TYPE_SRV, - &mut bind_srv, - ), - wgt::BufferBindingType::Storage { read_only: false } => ( - super::BufferViewKind::UnorderedAccess, - Direct3D12::D3D12_ROOT_PARAMETER_TYPE_UAV, - &mut bind_uav, - ), - }; - binding_map.insert( naga::ResourceBinding { group: index as u32, @@ -1074,28 +1088,121 @@ impl crate::Device for super::Device { }, hlsl::BindTarget { binding_array_size: entry.count.map(NonZeroU32::get), - ..bt.clone() + restrict_indexing: true, + ..bind_cbv }, ); - info.dynamic_buffers.push(kind); parameters.push(Direct3D12::D3D12_ROOT_PARAMETER { - ParameterType: parameter_ty, + ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_CBV, Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 { Descriptor: Direct3D12::D3D12_ROOT_DESCRIPTOR { - ShaderRegister: bt.register, - RegisterSpace: bt.space as u32, + ShaderRegister: bind_cbv.register, + RegisterSpace: bind_cbv.space as u32, }, }, ShaderVisibility: dynamic_buffers_visibility, }); - bt.register += entry.count.map_or(1, NonZeroU32::get); + bind_cbv.register += entry.count.map_or(1, NonZeroU32::get); + } + + // Root constants for (offsets of) dynamic storage buffers + if dynamic_storage_buffers > 0 { + let parameter_index = parameters.len(); + + parameters.push(Direct3D12::D3D12_ROOT_PARAMETER { + ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 { + Constants: Direct3D12::D3D12_ROOT_CONSTANTS { + ShaderRegister: bind_cbv.register, + RegisterSpace: bind_cbv.space as u32, + Num32BitValues: dynamic_storage_buffers, + }, + }, + ShaderVisibility: conv::map_visibility(visibility_view_dynamic_storage), + }); + + let binding = hlsl::OffsetsBindTarget { + space: bind_cbv.space, + register: bind_cbv.register, + size: dynamic_storage_buffers, + }; + + bind_cbv.register += 1; + + dynamic_storage_buffer_offsets_targets.insert(index as u32, binding); + info.dynamic_storage_buffer_offsets = Some(DynamicStorageBufferOffsets { + root_index: parameter_index as u32, + range: total_dynamic_storage_buffers as usize + ..total_dynamic_storage_buffers as usize + dynamic_storage_buffers as usize, + }); + total_dynamic_storage_buffers += dynamic_storage_buffers; } bind_group_infos.push(info); } + let sampler_heap_target = hlsl::SamplerHeapBindTargets { + standard_samplers: hlsl::BindTarget { + space: 0, + register: 0, + binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, + }, + comparison_samplers: hlsl::BindTarget { + space: 0, + register: 2048, + binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, + }, + }; + + let mut sampler_heap_root_index = None; + if sampler_in_any_bind_group { + // Sampler descriptor tables + // + // We bind two sampler ranges pointing to the same descriptor heap, using two different register ranges. + // + // We bind them as normal samplers in registers 0-2047 and comparison samplers in registers 2048-4095. + // Tier 2 hardware guarantees that the type of sampler only needs to match if the sampler is actually + // accessed in the shader. As such, we can bind the same array of samplers to both registers. + // + // We do this because HLSL does not allow you to alias registers at all. + let range_base = ranges.len(); + // Standard samplers, registers 0-2047 + ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE { + RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, + NumDescriptors: 2048, + BaseShaderRegister: 0, + RegisterSpace: 0, + OffsetInDescriptorsFromTableStart: 0, + }); + // Comparison samplers, registers 2048-4095 + ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE { + RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, + NumDescriptors: 2048, + BaseShaderRegister: 2048, + RegisterSpace: 0, + OffsetInDescriptorsFromTableStart: 0, + }); + + let range = &ranges[range_base..]; + sampler_heap_root_index = Some(parameters.len() as super::RootIndex); + parameters.push(Direct3D12::D3D12_ROOT_PARAMETER { + ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, + Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 { + DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE { + NumDescriptorRanges: range.len() as u32, + pDescriptorRanges: range.as_ptr(), + }, + }, + ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, + }); + } + // Ensure that we didn't reallocate! debug_assert_eq!(ranges.len(), total_non_dynamic_entries); @@ -1115,7 +1222,7 @@ impl crate::Device for super::Device { }, ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, // really needed for VS and CS only, }); - let binding = bind_cbv.clone(); + let binding = bind_cbv; bind_cbv.register += 1; (Some(parameter_index as u32), Some(binding)) } else { @@ -1233,6 +1340,7 @@ impl crate::Device for super::Device { total_root_elements: parameters.len() as super::RootIndex, special_constants, root_constant_info, + sampler_heap_root_index, }, bind_group_infos, naga_options: hlsl::Options { @@ -1241,8 +1349,12 @@ impl crate::Device for super::Device { fake_missing_bindings: false, special_constants_binding, push_constants_target, + dynamic_storage_buffer_offsets_targets, zero_initialize_workgroup_memory: true, restrict_indexing: true, + sampler_heap_target, + sampler_buffer_binding_map, + force_loop_bounding: true, }, }) } @@ -1269,14 +1381,6 @@ impl crate::Device for super::Device { if let Some(ref mut inner) = cpu_views { inner.stage.clear(); } - let mut cpu_samplers = desc - .layout - .cpu_heap_samplers - .as_ref() - .map(|cpu_heap| cpu_heap.inner.lock()); - if let Some(ref mut inner) = cpu_samplers { - inner.stage.clear(); - } let mut dynamic_buffers = Vec::new(); let layout_and_entry_iter = desc.entries.iter().map(|entry| { @@ -1288,26 +1392,38 @@ impl crate::Device for super::Device { .expect("internal error: no layout entry found with binding slot"); (layout, entry) }); + let mut sampler_indexes: Vec = Vec::new(); + for (layout, entry) in layout_and_entry_iter { match layout.ty { wgt::BindingType::Buffer { - has_dynamic_offset: true, + ty, + has_dynamic_offset, .. } => { - let start = entry.resource_index as usize; - let end = start + entry.count as usize; - for data in &desc.buffers[start..end] { - dynamic_buffers.push(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: data.resolve_address(), - }); - } - } - wgt::BindingType::Buffer { ty, .. } => { let start = entry.resource_index as usize; let end = start + entry.count as usize; for data in &desc.buffers[start..end] { let gpu_address = data.resolve_address(); - let size = data.resolve_size() as u32; + let mut size = data.resolve_size() as u32; + + if has_dynamic_offset { + match ty { + wgt::BufferBindingType::Uniform => { + dynamic_buffers.push(super::DynamicBuffer::Uniform( + Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { + ptr: data.resolve_address(), + }, + )); + continue; + } + wgt::BufferBindingType::Storage { .. } => { + size = (data.buffer.size - data.offset) as u32; + dynamic_buffers.push(super::DynamicBuffer::Storage); + } + } + } + let inner = cpu_views.as_mut().unwrap(); let cpu_index = inner.stage.len() as u32; let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); @@ -1392,8 +1508,8 @@ impl crate::Device for super::Device { wgt::BindingType::Sampler { .. } => { let start = entry.resource_index as usize; let end = start + entry.count as usize; - for data in &desc.samplers[start..end] { - cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + for &data in &desc.samplers[start..end] { + sampler_indexes.push(data.index); } } wgt::BindingType::AccelerationStructure { .. } => { @@ -1426,6 +1542,92 @@ impl crate::Device for super::Device { } } + let sampler_index_buffer = if !sampler_indexes.is_empty() { + let buffer_size = (sampler_indexes.len() * size_of::()) as u64; + + let label = if let Some(label) = desc.label { + Cow::Owned(format!("{} (Internal Sampler Index Buffer)", label)) + } else { + Cow::Borrowed("Internal Sampler Index Buffer") + }; + + let buffer_desc = crate::BufferDescriptor { + label: None, + size: buffer_size, + usage: wgt::BufferUses::STORAGE_READ_ONLY | wgt::BufferUses::MAP_WRITE, + // D3D12 backend doesn't care about the memory flags + memory_flags: crate::MemoryFlags::empty(), + }; + + let raw_buffer_desc = Direct3D12::D3D12_RESOURCE_DESC { + Dimension: Direct3D12::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: buffer_size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN, + SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: Direct3D12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: Direct3D12::D3D12_RESOURCE_FLAG_NONE, + }; + + let (buffer, allocation) = + super::suballocation::create_buffer_resource(self, &buffer_desc, raw_buffer_desc)?; + + unsafe { buffer.SetName(&windows::core::HSTRING::from(&*label)) } + .into_device_result("SetName")?; + + let mut mapping = ptr::null_mut::(); + unsafe { buffer.Map(0, None, Some(&mut mapping)) }.into_device_result("Map")?; + + assert!(!mapping.is_null()); + assert_eq!(mapping as usize % 4, 0); + + unsafe { + ptr::copy_nonoverlapping( + sampler_indexes.as_ptr(), + mapping.cast(), + sampler_indexes.len(), + ) + }; + + // The unmapping is not needed, as all memory is coherent in d3d12, but lets be nice to our address space. + unsafe { buffer.Unmap(0, None) }; + + let srv_desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN, + ViewDimension: Direct3D12::D3D12_SRV_DIMENSION_BUFFER, + Anonymous: Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC_0 { + Buffer: Direct3D12::D3D12_BUFFER_SRV { + FirstElement: 0, + NumElements: sampler_indexes.len() as u32, + StructureByteStride: 4, + Flags: Direct3D12::D3D12_BUFFER_SRV_FLAG_NONE, + }, + }, + Shader4ComponentMapping: Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + }; + + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let srv = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + + unsafe { + self.raw + .CreateShaderResourceView(&buffer, Some(&srv_desc), srv) + }; + + cpu_views.as_mut().unwrap().stage.push(srv); + + Some(super::SamplerIndexBuffer { buffer, allocation }) + } else { + None + }; + let handle_views = match cpu_views { Some(inner) => { let dual = unsafe { @@ -1440,26 +1642,12 @@ impl crate::Device for super::Device { } None => None, }; - let handle_samplers = match cpu_samplers { - Some(inner) => { - let dual = unsafe { - descriptor::upload( - &self.raw, - &inner, - &self.shared.heap_samplers, - &desc.layout.copy_counts, - ) - }?; - Some(dual) - } - None => None, - }; self.counters.bind_groups.add(1); Ok(super::BindGroup { handle_views, - handle_samplers, + sampler_index_buffer, dynamic_buffers, }) } @@ -1468,8 +1656,14 @@ impl crate::Device for super::Device { if let Some(dual) = group.handle_views { self.shared.heap_views.free_slice(dual); } - if let Some(dual) = group.handle_samplers { - self.shared.heap_samplers.free_slice(dual); + + if let Some(sampler_buffer) = group.sampler_index_buffer { + // Make sure the buffer is dropped before the allocation + drop(sampler_buffer.buffer); + + if let Some(allocation) = sampler_buffer.allocation { + super::suballocation::free_buffer_allocation(self, allocation, &self.mem_allocator); + } } self.counters.bind_groups.sub(1); @@ -2122,7 +2316,7 @@ impl crate::Device for super::Device { const MAX_U24: u32 = (1u32 << 24u32) - 1u32; let temp = Direct3D12::D3D12_RAYTRACING_INSTANCE_DESC { Transform: instance.transform, - _bitfield1: (instance.custom_index & MAX_U24) | (u32::from(instance.mask) << 24), + _bitfield1: (instance.custom_data & MAX_U24) | (u32::from(instance.mask) << 24), _bitfield2: 0, AccelerationStructure: instance.blas_address, }; diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs index 64b32b772d..3a87e2617a 100644 --- a/wgpu-hal/src/dx12/instance.rs +++ b/wgpu-hal/src/dx12/instance.rs @@ -67,7 +67,7 @@ impl crate::Instance for super::Instance { } // Initialize DXC shader compiler - let dxc_container = match desc.dx12_shader_compiler.clone() { + let dxc_container = match desc.backend_options.dx12.shader_compiler.clone() { wgt::Dx12Compiler::DynamicDxc { dxil_path, dxc_path, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 809d53c74d..d5d6843c39 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -13,13 +13,52 @@ and destination states match, and they are for storage sync. For now, all resources are created with "committed" memory. +## Sampler Descriptor Management + +At most one descriptor heap of each type can be bound at once. This +means that the descriptors from all bind groups need to be present +in the same heap, and they need to be contiguous within that heap. +This is not a problem for the SRV/CBV/UAV heap as it can be sized into +the millions of entries. However the sampler heap is limited to 2048 entries. + +In order to work around this limitation, we refer to samplers indirectly by index. +The entire sampler heap is bound at once and a buffer containing all sampler indexes +for that bind group is bound. The shader then uses the index to look up the sampler +in the heap. To help visualize this, the generated HLSL looks like this: + +```wgsl +@group(0) @binding(2) var myLinearSampler: sampler; +@group(1) @binding(1) var myAnisoSampler: sampler; +@group(1) @binding(4) var myCompSampler: sampler; +``` + +```cpp +// These bindings alias the same descriptors. Depending on the type, the shader will use the correct one. +SamplerState nagaSamplerHeap[2048]: register(s0, space0); +SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s2048, space1); + +StructuredBuffer nagaGroup0SamplerIndexArray : register(t0, space0); +StructuredBuffer nagaGroup1SamplerIndexArray : register(t1, space0); + +// Indexes into group 0 index array +static const SamplerState myLinearSampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[0]]; + +// Indexes into group 1 index array +static const SamplerState myAnisoSampler = nagaSamplerHeap[nagaGroup1SamplerIndexArray[0]]; +static const SamplerComparisonState myCompSampler = nagaComparisonSamplerHeap[nagaGroup1SamplerIndexArray[1]]; +``` + +Without this transform we would need separate set of sampler descriptors for each unique combination of samplers +in a bind group. This results in a lot of duplication and makes it easy to hit the 2048 limit. With the transform +the limit is merely 2048 unique samplers in existence, which is much more reasonable. + ## Resource binding See ['Device::create_pipeline_layout`] documentation for the structure of the root signature corresponding to WebGPU pipeline layout. Binding groups is mostly straightforward, with one big caveat: -all bindings have to be reset whenever the pipeline layout changes. +all bindings have to be reset whenever the root signature changes. This is the rule of D3D12, and we can do nothing to help it. We detect this change at both [`crate::CommandEncoder::set_bind_group`] @@ -39,6 +78,7 @@ mod conv; mod descriptor; mod device; mod instance; +mod sampler; mod shader_compilation; mod suballocation; mod types; @@ -518,6 +558,7 @@ struct PrivateCapabilities { casting_fully_typed_format_supported: bool, suballocation_supported: bool, shader_model: naga::back::hlsl::ShaderModel, + max_sampler_descriptor_heap_size: u32, } #[derive(Default)] @@ -575,7 +616,7 @@ struct DeviceShared { zero_buffer: Direct3D12::ID3D12Resource, cmd_signatures: CommandSignatures, heap_views: descriptor::GeneralHeap, - heap_samplers: descriptor::GeneralHeap, + sampler_heap: sampler::SamplerHeap, } unsafe impl Send for DeviceShared {} @@ -591,7 +632,6 @@ pub struct Device { rtv_pool: Mutex, dsv_pool: Mutex, srv_uav_pool: Mutex, - sampler_pool: Mutex, // library library: Arc, #[cfg(feature = "renderdoc")] @@ -645,7 +685,7 @@ struct PassResolve { format: Dxgi::Common::DXGI_FORMAT, } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] enum RootElement { Empty, Constant, @@ -659,11 +699,20 @@ enum RootElement { }, /// Descriptor table. Table(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE), - /// Descriptor for a buffer that has dynamic offset. - DynamicOffsetBuffer { - kind: BufferViewKind, + /// Descriptor for an uniform buffer that has dynamic offset. + DynamicUniformBuffer { address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE, }, + /// Descriptor table referring to the entire sampler heap. + SamplerHeap, + /// Root constants for dynamic offsets. + /// + /// start..end is the range of values in [`PassState::dynamic_storage_buffer_offsets`] + /// that will be used to update the root constants. + DynamicOffsetsBuffer { + start: usize, + end: usize, + }, } #[derive(Clone, Copy)] @@ -679,6 +728,7 @@ struct PassState { layout: PipelineLayoutShared, root_elements: [RootElement; MAX_ROOT_ELEMENTS], constant_data: [u32; MAX_ROOT_ELEMENTS], + dynamic_storage_buffer_offsets: Vec, dirty_root_elements: u64, vertex_buffers: [Direct3D12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], dirty_vertex_buffers: usize, @@ -700,9 +750,11 @@ impl PassState { total_root_elements: 0, special_constants: None, root_constant_info: None, + sampler_heap_root_index: None, }, root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], constant_data: [0; MAX_ROOT_ELEMENTS], + dynamic_storage_buffer_offsets: Vec::new(), dirty_root_elements: 0, vertex_buffers: [Default::default(); crate::MAX_VERTEX_BUFFERS], dirty_vertex_buffers: 0, @@ -853,7 +905,8 @@ unsafe impl Sync for TextureView {} #[derive(Debug)] pub struct Sampler { - handle: descriptor::Handle, + index: sampler::SamplerIndex, + desc: Direct3D12::D3D12_SAMPLER_DESC, } impl crate::DynSampler for Sampler {} @@ -893,24 +946,28 @@ pub struct BindGroupLayout { /// Sorted list of entries. entries: Vec, cpu_heap_views: Option, - cpu_heap_samplers: Option, copy_counts: Vec, // all 1's } impl crate::DynBindGroupLayout for BindGroupLayout {} #[derive(Debug, Clone, Copy)] -enum BufferViewKind { - Constant, - ShaderResource, - UnorderedAccess, +enum DynamicBuffer { + Uniform(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE), + Storage, +} + +#[derive(Debug)] +struct SamplerIndexBuffer { + buffer: Direct3D12::ID3D12Resource, + allocation: Option, } #[derive(Debug)] pub struct BindGroup { handle_views: Option, - handle_samplers: Option, - dynamic_buffers: Vec, + sampler_index_buffer: Option, + dynamic_buffers: Vec, } impl crate::DynBindGroup for BindGroup {} @@ -930,7 +987,7 @@ type RootIndex = u32; struct BindGroupInfo { base_root_index: RootIndex, tables: TableTypes, - dynamic_buffers: Vec, + dynamic_storage_buffer_offsets: Option, } #[derive(Debug, Clone)] @@ -939,12 +996,19 @@ struct RootConstantInfo { range: std::ops::Range, } +#[derive(Debug, Clone)] +struct DynamicStorageBufferOffsets { + root_index: RootIndex, + range: std::ops::Range, +} + #[derive(Debug, Clone)] struct PipelineLayoutShared { signature: Option, total_root_elements: RootIndex, special_constants: Option, root_constant_info: Option, + sampler_heap_root_index: Option, } unsafe impl Send for PipelineLayoutShared {} diff --git a/wgpu-hal/src/dx12/sampler.rs b/wgpu-hal/src/dx12/sampler.rs new file mode 100644 index 0000000000..f49555391f --- /dev/null +++ b/wgpu-hal/src/dx12/sampler.rs @@ -0,0 +1,251 @@ +//! Sampler management for DX12. +//! +//! Nearly identical to the Vulkan sampler cache, with added descriptor heap management. + +use hashbrown::{hash_map::Entry, HashMap}; + +use ordered_float::OrderedFloat; +use parking_lot::Mutex; +use windows::Win32::Graphics::Direct3D12::*; + +use crate::dx12::HResult; + +/// The index of a sampler in the global sampler heap. +/// +/// This is a type-safe, transparent wrapper around a u32. +#[repr(transparent)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub(crate) struct SamplerIndex(u32); + +/// [`D3D12_SAMPLER_DESC`] is not hashable, so we wrap it in a newtype that is. +/// +/// We use [`OrderedFloat`] to allow for floating point values to be compared and +/// hashed in a defined way. +#[derive(Debug, Copy, Clone)] +struct HashableSamplerDesc(D3D12_SAMPLER_DESC); + +impl PartialEq for HashableSamplerDesc { + fn eq(&self, other: &Self) -> bool { + self.0.Filter == other.0.Filter + && self.0.AddressU == other.0.AddressU + && self.0.AddressV == other.0.AddressV + && self.0.AddressW == other.0.AddressW + && OrderedFloat(self.0.MipLODBias) == OrderedFloat(other.0.MipLODBias) + && self.0.MaxAnisotropy == other.0.MaxAnisotropy + && self.0.ComparisonFunc == other.0.ComparisonFunc + && self.0.BorderColor.map(OrderedFloat) == other.0.BorderColor.map(OrderedFloat) + && OrderedFloat(self.0.MinLOD) == OrderedFloat(other.0.MinLOD) + && OrderedFloat(self.0.MaxLOD) == OrderedFloat(other.0.MaxLOD) + } +} + +impl Eq for HashableSamplerDesc {} + +impl std::hash::Hash for HashableSamplerDesc { + fn hash(&self, state: &mut H) { + self.0.Filter.0.hash(state); + self.0.AddressU.0.hash(state); + self.0.AddressV.0.hash(state); + self.0.AddressW.0.hash(state); + OrderedFloat(self.0.MipLODBias).hash(state); + self.0.MaxAnisotropy.hash(state); + self.0.ComparisonFunc.0.hash(state); + self.0.BorderColor.map(OrderedFloat).hash(state); + OrderedFloat(self.0.MinLOD).hash(state); + OrderedFloat(self.0.MaxLOD).hash(state); + } +} + +/// Entry in the sampler cache. +struct CacheEntry { + index: SamplerIndex, + ref_count: u32, +} + +/// Container for the mutable management state of the sampler heap. +/// +/// We have this separated, using interior mutability, to allow for the outside world +/// to access the heap directly without needing to take the lock. +pub(crate) struct SamplerHeapState { + /// Mapping from the sampler description to the index within the heap and the refcount. + mapping: HashMap, + /// List of free sampler indices. + freelist: Vec, +} + +/// Global sampler heap for the device. +/// +/// As D3D12 only allows 2048 samplers to be in a single heap, we need to cache +/// samplers aggressively and refer to them in shaders by index. +pub(crate) struct SamplerHeap { + /// Mutable management state of the sampler heap. + state: Mutex, + + /// The heap itself. + heap: ID3D12DescriptorHeap, + /// The CPU-side handle to the first descriptor in the heap. + /// + /// Both the CPU and GPU handles point to the same descriptor, just in + /// different contexts. + heap_cpu_start_handle: D3D12_CPU_DESCRIPTOR_HANDLE, + /// The GPU-side handle to the first descriptor in the heap. + /// + /// Both the CPU and GPU handles point to the same descriptor, just in + /// different contexts. + heap_gpu_start_handle: D3D12_GPU_DESCRIPTOR_HANDLE, + + /// This is the device-specific size of sampler descriptors. + descriptor_stride: u32, +} + +impl SamplerHeap { + pub fn new( + device: &ID3D12Device, + private_caps: &super::PrivateCapabilities, + ) -> Result { + profiling::scope!("SamplerHeap::new"); + + // WARP can report this as 2M or more. We clamp it to 64k to be safe. + const SAMPLER_HEAP_SIZE_CLAMP: u32 = 64 * 1024; + + let max_unique_samplers = private_caps + .max_sampler_descriptor_heap_size + .min(SAMPLER_HEAP_SIZE_CLAMP); + + let desc = D3D12_DESCRIPTOR_HEAP_DESC { + Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + NumDescriptors: max_unique_samplers, + Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + NodeMask: 0, + }; + let heap = unsafe { device.CreateDescriptorHeap::(&desc) } + .into_device_result("Failed to create global GPU-Visible Sampler Descriptor Heap")?; + + let heap_cpu_start_handle = unsafe { heap.GetCPUDescriptorHandleForHeapStart() }; + let heap_gpu_start_handle = unsafe { heap.GetGPUDescriptorHandleForHeapStart() }; + + let descriptor_stride = + unsafe { device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) }; + + Ok(Self { + state: Mutex::new(SamplerHeapState { + mapping: HashMap::new(), + // Reverse so that samplers get allocated starting from zero. + freelist: (0..max_unique_samplers).map(SamplerIndex).rev().collect(), + }), + heap, + heap_cpu_start_handle, + heap_gpu_start_handle, + descriptor_stride, + }) + } + + /// Returns a reference to the raw descriptor heap. + pub fn heap(&self) -> &ID3D12DescriptorHeap { + &self.heap + } + + /// Returns a reference the handle to be bound to the descriptor table. + pub fn gpu_descriptor_table(&self) -> D3D12_GPU_DESCRIPTOR_HANDLE { + self.heap_gpu_start_handle + } + + /// Add a sampler with the given description to the heap. + /// + /// If the sampler already exists, the refcount is incremented and the existing index is returned. + /// + /// If the sampler does not exist, a new sampler is created and the index is returned. + /// + /// If the heap is full, an error is returned. + pub fn create_sampler( + &self, + device: &ID3D12Device, + desc: D3D12_SAMPLER_DESC, + ) -> Result { + profiling::scope!("SamplerHeap::create_sampler"); + + let hashable_desc = HashableSamplerDesc(desc); + + // Eagarly dereference the lock to allow split borrows. + let state = &mut *self.state.lock(); + + // Lookup the sampler in the mapping. + match state.mapping.entry(hashable_desc) { + Entry::Occupied(occupied_entry) => { + // We have found a match, so increment the refcount and return the index. + let entry = occupied_entry.into_mut(); + entry.ref_count += 1; + Ok(entry.index) + } + Entry::Vacant(vacant_entry) => { + // We need to create a new sampler. + + // Try to get a new index from the freelist. + let Some(index) = state.freelist.pop() else { + // If the freelist is empty, we have hit the maximum number of samplers. + log::error!("There is no more room in the global sampler heap for more unique samplers. Your device supports a maximum of {} unique samplers.", state.mapping.len()); + return Err(crate::DeviceError::OutOfMemory); + }; + + // Compute the CPU side handle for the new sampler. + let handle = D3D12_CPU_DESCRIPTOR_HANDLE { + ptr: self.heap_cpu_start_handle.ptr + + self.descriptor_stride as usize * index.0 as usize, + }; + + unsafe { + device.CreateSampler(&desc, handle); + } + + // Insert the new sampler into the mapping. + vacant_entry.insert(CacheEntry { + index, + ref_count: 1, + }); + + Ok(index) + } + } + } + + /// Decrement the refcount of the sampler with the given description. + /// + /// If the refcount reaches zero, the sampler is destroyed and the index is returned to the freelist. + /// + /// The provided index is checked against the index of the sampler with the given description, ensuring + /// that there isn't a clerical error from the caller. + pub fn destroy_sampler(&self, desc: D3D12_SAMPLER_DESC, provided_index: SamplerIndex) { + profiling::scope!("SamplerHeap::destroy_sampler"); + + // Eagarly dereference the lock to allow split borrows. + let state = &mut *self.state.lock(); + + // Get the index of the sampler to destroy. + let Entry::Occupied(mut hash_map_entry) = state.mapping.entry(HashableSamplerDesc(desc)) + else { + log::error!( + "Tried to destroy a sampler that doesn't exist. Sampler description: {:#?}", + desc + ); + return; + }; + let cache_entry = hash_map_entry.get_mut(); + + // Ensure that the provided index matches the index of the sampler to destroy. + assert_eq!( + cache_entry.index, provided_index, + "Mismatched sampler index, this is an implementation bug" + ); + + // Decrement the refcount of the sampler. + cache_entry.ref_count -= 1; + + // If we are the last reference, remove the sampler from the mapping and return the index to the freelist. + // + // As samplers only exist as descriptors in the heap, there is nothing needed to be done to destroy the sampler. + if cache_entry.ref_count == 0 { + state.freelist.push(cache_entry.index); + hash_map_entry.remove(); + } + } +} diff --git a/wgpu-hal/src/dx12/suballocation.rs b/wgpu-hal/src/dx12/suballocation.rs index 2b0cbf8a47..89ce6c5d0b 100644 --- a/wgpu-hal/src/dx12/suballocation.rs +++ b/wgpu-hal/src/dx12/suballocation.rs @@ -53,8 +53,8 @@ pub(crate) fn create_buffer_resource( desc: &crate::BufferDescriptor, raw_desc: Direct3D12::D3D12_RESOURCE_DESC, ) -> Result<(Direct3D12::ID3D12Resource, Option), crate::DeviceError> { - let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); - let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + let is_cpu_read = desc.usage.contains(wgt::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(wgt::BufferUses::MAP_WRITE); // Workaround for Intel Xe drivers if !device.private_caps.suballocation_supported { @@ -289,8 +289,8 @@ pub(crate) fn create_committed_buffer_resource( desc: &crate::BufferDescriptor, raw_desc: Direct3D12::D3D12_RESOURCE_DESC, ) -> Result { - let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); - let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + let is_cpu_read = desc.usage.contains(wgt::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(wgt::BufferUses::MAP_WRITE); let heap_properties = Direct3D12::D3D12_HEAP_PROPERTIES { Type: Direct3D12::D3D12_HEAP_TYPE_CUSTOM, diff --git a/wgpu-hal/src/dx12/view.rs b/wgpu-hal/src/dx12/view.rs index 8162b012af..fa8f2a4d61 100644 --- a/wgpu-hal/src/dx12/view.rs +++ b/wgpu-hal/src/dx12/view.rs @@ -184,8 +184,8 @@ impl ViewDescriptor { desc.ViewDimension = Direct3D12::D3D12_UAV_DIMENSION_TEXTURE3D; desc.Anonymous.Texture3D = Direct3D12::D3D12_TEX3D_UAV { MipSlice: self.mip_level_base, - FirstWSlice: self.array_layer_base, - WSize: self.array_layer_count, + FirstWSlice: 0, + WSize: u32::MAX, } } wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs index 4ecdf74723..8fb65fa161 100644 --- a/wgpu-hal/src/dynamic/command.rs +++ b/wgpu-hal/src/dynamic/command.rs @@ -4,7 +4,7 @@ use crate::{ AccelerationStructureBarrier, Api, Attachment, BufferBarrier, BufferBinding, BufferCopy, BufferTextureCopy, BuildAccelerationStructureDescriptor, ColorAttachment, CommandEncoder, ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange, - PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier, TextureCopy, TextureUses, + PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier, TextureCopy, }; use super::{ @@ -37,7 +37,7 @@ pub trait DynCommandEncoder: DynResource + std::fmt::Debug { unsafe fn copy_texture_to_texture( &mut self, src: &dyn DynTexture, - src_usage: TextureUses, + src_usage: wgt::TextureUses, dst: &dyn DynTexture, regions: &[TextureCopy], ); @@ -52,7 +52,7 @@ pub trait DynCommandEncoder: DynResource + std::fmt::Debug { unsafe fn copy_texture_to_buffer( &mut self, src: &dyn DynTexture, - src_usage: TextureUses, + src_usage: wgt::TextureUses, dst: &dyn DynBuffer, regions: &[BufferTextureCopy], ); @@ -240,7 +240,7 @@ impl DynCommandEncoder for C { unsafe fn copy_texture_to_texture( &mut self, src: &dyn DynTexture, - src_usage: TextureUses, + src_usage: wgt::TextureUses, dst: &dyn DynTexture, regions: &[TextureCopy], ) { @@ -267,7 +267,7 @@ impl DynCommandEncoder for C { unsafe fn copy_texture_to_buffer( &mut self, src: &dyn DynTexture, - src_usage: TextureUses, + src_usage: wgt::TextureUses, dst: &dyn DynBuffer, regions: &[BufferTextureCopy], ) { diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index dd1e183ed2..d3d3908ac3 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -358,7 +358,7 @@ impl crate::CommandEncoder for Encoder { unsafe fn copy_texture_to_texture( &mut self, src: &Resource, - src_usage: crate::TextureUses, + src_usage: wgt::TextureUses, dst: &Resource, regions: T, ) { @@ -369,7 +369,7 @@ impl crate::CommandEncoder for Encoder { unsafe fn copy_texture_to_buffer( &mut self, src: &Resource, - src_usage: crate::TextureUses, + src_usage: wgt::TextureUses, dst: &Resource, regions: T, ) { diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 67ff20ff19..85bae59457 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -192,6 +192,7 @@ impl super::Adapter { pub(super) unsafe fn expose( context: super::AdapterContext, + backend_options: wgt::GlBackendOptions, ) -> Option> { let gl = context.lock(); let extensions = gl.supported_extensions(); @@ -824,6 +825,7 @@ impl super::Adapter { private_caps, workarounds, features, + options: backend_options, shading_language_version, next_shader_id: Default::default(), program_cache: Default::default(), @@ -1211,7 +1213,7 @@ impl crate::Adapter for super::Adapter { composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO maximum_frame_latency: 2..=2, //TODO, unused currently current_extent: None, - usage: crate::TextureUses::COLOR_TARGET, + usage: wgt::TextureUses::COLOR_TARGET, }) } else { None diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 0f495b4834..b706c116e8 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -287,11 +287,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } for bar in barriers { // GLES only synchronizes storage -> anything explicitly - if !bar - .usage - .from - .contains(crate::BufferUses::STORAGE_READ_WRITE) - { + if !bar.usage.from.contains(wgt::BufferUses::STORAGE_READ_WRITE) { continue; } self.cmd_buffer @@ -311,13 +307,13 @@ impl crate::CommandEncoder for super::CommandEncoder { return; } - let mut combined_usage = crate::TextureUses::empty(); + let mut combined_usage = wgt::TextureUses::empty(); for bar in barriers { // GLES only synchronizes storage -> anything explicitly if !bar .usage .from - .contains(crate::TextureUses::STORAGE_READ_WRITE) + .contains(wgt::TextureUses::STORAGE_READ_WRITE) { continue; } @@ -393,7 +389,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_texture( &mut self, src: &super::Texture, - _src_usage: crate::TextureUses, + _src_usage: wgt::TextureUses, dst: &super::Texture, regions: T, ) where @@ -439,7 +435,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_buffer( &mut self, src: &super::Texture, - _src_usage: crate::TextureUses, + _src_usage: wgt::TextureUses, dst: &super::Buffer, regions: T, ) where diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 75cd74b18f..9ea4d5c397 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1,6 +1,7 @@ use super::{conv, PrivateCapabilities}; use crate::auxil::map_naga_stage; use glow::HasContext; +use naga::FastHashMap; use std::{ cmp::max, convert::TryInto, @@ -8,7 +9,7 @@ use std::{ sync::{Arc, Mutex}, }; -use crate::{AtomicFenceValue, TlasInstance}; +use crate::TlasInstance; use arrayvec::ArrayVec; use std::sync::atomic::Ordering; @@ -16,7 +17,7 @@ type ShaderStage<'a> = ( naga::ShaderStage, &'a crate::ProgrammableStage<'a, super::ShaderModule>, ); -type NameBindingMap = rustc_hash::FxHashMap; +type NameBindingMap = FastHashMap; struct CompilationContext<'a> { layout: &'a super::PipelineLayout, @@ -505,7 +506,7 @@ impl crate::Device for super::Device { &self, desc: &crate::BufferDescriptor, ) -> Result { - let target = if desc.usage.contains(crate::BufferUses::INDEX) { + let target = if desc.usage.contains(wgt::BufferUses::INDEX) { glow::ELEMENT_ARRAY_BUFFER } else { glow::ARRAY_BUFFER @@ -520,7 +521,7 @@ impl crate::Device for super::Device { .private_caps .contains(PrivateCapabilities::BUFFER_ALLOCATION); - if emulate_map && desc.usage.intersects(crate::BufferUses::MAP_WRITE) { + if emulate_map && desc.usage.intersects(wgt::BufferUses::MAP_WRITE) { return Ok(super::Buffer { raw: None, target, @@ -533,7 +534,7 @@ impl crate::Device for super::Device { let gl = &self.shared.context.lock(); - let target = if desc.usage.contains(crate::BufferUses::INDEX) { + let target = if desc.usage.contains(wgt::BufferUses::INDEX) { glow::ELEMENT_ARRAY_BUFFER } else { glow::ARRAY_BUFFER @@ -541,16 +542,16 @@ impl crate::Device for super::Device { let is_host_visible = desc .usage - .intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE); + .intersects(wgt::BufferUses::MAP_READ | wgt::BufferUses::MAP_WRITE); let is_coherent = desc .memory_flags .contains(crate::MemoryFlags::PREFER_COHERENT); let mut map_flags = 0; - if desc.usage.contains(crate::BufferUses::MAP_READ) { + if desc.usage.contains(wgt::BufferUses::MAP_READ) { map_flags |= glow::MAP_READ_BIT; } - if desc.usage.contains(crate::BufferUses::MAP_WRITE) { + if desc.usage.contains(wgt::BufferUses::MAP_WRITE) { map_flags |= glow::MAP_WRITE_BIT; } @@ -573,14 +574,14 @@ impl crate::Device for super::Device { } } // TODO: may also be required for other calls involving `buffer_sub_data_u8_slice` (e.g. copy buffer to buffer and clear buffer) - if desc.usage.intersects(crate::BufferUses::QUERY_RESOLVE) { + if desc.usage.intersects(wgt::BufferUses::QUERY_RESOLVE) { map_flags |= glow::DYNAMIC_STORAGE_BIT; } unsafe { gl.buffer_storage(target, raw_size, None, map_flags) }; } else { assert!(!is_coherent); let usage = if is_host_visible { - if desc.usage.contains(crate::BufferUses::MAP_READ) { + if desc.usage.contains(wgt::BufferUses::MAP_READ) { glow::STREAM_READ } else { glow::DYNAMIC_DRAW @@ -596,7 +597,7 @@ impl crate::Device for super::Device { unsafe { gl.bind_buffer(target, None) }; - if !is_coherent && desc.usage.contains(crate::BufferUses::MAP_WRITE) { + if !is_coherent && desc.usage.contains(wgt::BufferUses::MAP_WRITE) { map_flags |= glow::MAP_FLUSH_EXPLICIT_BIT; } //TODO: do we need `glow::MAP_UNSYNCHRONIZED_BIT`? @@ -613,7 +614,7 @@ impl crate::Device for super::Device { } } - let data = if emulate_map && desc.usage.contains(crate::BufferUses::MAP_READ) { + let data = if emulate_map && desc.usage.contains(wgt::BufferUses::MAP_READ) { Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))) } else { None @@ -727,9 +728,9 @@ impl crate::Device for super::Device { ) -> Result { let gl = &self.shared.context.lock(); - let render_usage = crate::TextureUses::COLOR_TARGET - | crate::TextureUses::DEPTH_STENCIL_WRITE - | crate::TextureUses::DEPTH_STENCIL_READ; + let render_usage = wgt::TextureUses::COLOR_TARGET + | wgt::TextureUses::DEPTH_STENCIL_WRITE + | wgt::TextureUses::DEPTH_STENCIL_READ; let format_desc = self.shared.describe_texture_format(desc.format); let inner = if render_usage.contains(desc.usage) @@ -1523,17 +1524,12 @@ impl crate::Device for super::Device { unsafe fn create_fence(&self) -> Result { self.counters.fences.add(1); - Ok(super::Fence { - last_completed: AtomicFenceValue::new(0), - pending: Vec::new(), - }) + Ok(super::Fence::new(&self.shared.options)) } unsafe fn destroy_fence(&self, fence: super::Fence) { let gl = &self.shared.context.lock(); - for (_, sync) in fence.pending { - unsafe { gl.delete_sync(sync) }; - } + fence.destroy(gl); self.counters.fences.sub(1); } @@ -1550,44 +1546,21 @@ impl crate::Device for super::Device { wait_value: crate::FenceValue, timeout_ms: u32, ) -> Result { - if fence.last_completed.load(Ordering::Relaxed) < wait_value { - let gl = &self.shared.context.lock(); - // MAX_CLIENT_WAIT_TIMEOUT_WEBGL is: - // - 1s in Gecko https://searchfox.org/mozilla-central/rev/754074e05178e017ef6c3d8e30428ffa8f1b794d/dom/canvas/WebGLTypes.h#1386 - // - 0 in WebKit https://github.com/WebKit/WebKit/blob/4ef90d4672ca50267c0971b85db403d9684508ea/Source/WebCore/html/canvas/WebGL2RenderingContext.cpp#L110 - // - 0 in Chromium https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/webgl/webgl2_rendering_context_base.cc;l=112;drc=a3cb0ac4c71ec04abfeaed199e5d63230eca2551 - let timeout_ns = if cfg!(any(webgl, Emscripten)) { - 0 - } else { - (timeout_ms as u64 * 1_000_000).min(!0u32 as u64) - }; - if let Some(&(_, sync)) = fence - .pending - .iter() - .find(|&&(value, _)| value >= wait_value) - { - let signalled = match unsafe { - gl.client_wait_sync(sync, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns as i32) - } { - // for some reason firefox returns WAIT_FAILED, to investigate - #[cfg(any(webgl, Emscripten))] - glow::WAIT_FAILED => { - log::warn!("wait failed!"); - false - } - glow::TIMEOUT_EXPIRED => false, - glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => true, - _ => return Err(crate::DeviceError::Lost), - }; - if signalled { - fence - .last_completed - .fetch_max(wait_value, Ordering::Relaxed); - } - return Ok(signalled); - } + if fence.satisfied(wait_value) { + return Ok(true); } - Ok(true) + + let gl = &self.shared.context.lock(); + // MAX_CLIENT_WAIT_TIMEOUT_WEBGL is: + // - 1s in Gecko https://searchfox.org/mozilla-central/rev/754074e05178e017ef6c3d8e30428ffa8f1b794d/dom/canvas/WebGLTypes.h#1386 + // - 0 in WebKit https://github.com/WebKit/WebKit/blob/4ef90d4672ca50267c0971b85db403d9684508ea/Source/WebCore/html/canvas/WebGL2RenderingContext.cpp#L110 + // - 0 in Chromium https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/webgl/webgl2_rendering_context_base.cc;l=112;drc=a3cb0ac4c71ec04abfeaed199e5d63230eca2551 + let timeout_ns = if cfg!(any(webgl, Emscripten)) { + 0 + } else { + (timeout_ms as u64 * 1_000_000).min(!0u32 as u64) + }; + fence.wait(gl, wait_value, timeout_ns) } unsafe fn start_capture(&self) -> bool { diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs index 24d5a125b4..b7d9868609 100644 --- a/wgpu-hal/src/gles/egl.rs +++ b/wgpu-hal/src/gles/egl.rs @@ -1,10 +1,9 @@ use glow::HasContext; +use hashbrown::HashMap; use once_cell::sync::Lazy; use parking_lot::{MappedMutexGuard, Mutex, MutexGuard, RwLock}; -use std::{ - collections::HashMap, ffi, mem::ManuallyDrop, os::raw, ptr, rc::Rc, sync::Arc, time::Duration, -}; +use std::{ffi, mem::ManuallyDrop, os::raw, ptr, rc::Rc, sync::Arc, time::Duration}; /// The amount of time to wait while trying to obtain a lock to the adapter context const CONTEXT_LOCK_TIMEOUT_SECS: u64 = 1; @@ -738,6 +737,7 @@ struct WindowSystemInterface { pub struct Instance { wsi: WindowSystemInterface, flags: wgt::InstanceFlags, + options: wgt::GlBackendOptions, inner: Mutex, } @@ -922,7 +922,12 @@ impl crate::Instance for Instance { unsafe { (function)(Some(egl_debug_proc), attributes.as_ptr()) }; } - let inner = Inner::create(desc.flags, egl, display, desc.gles_minor_version)?; + let inner = Inner::create( + desc.flags, + egl, + display, + desc.backend_options.gl.gles_minor_version, + )?; Ok(Instance { wsi: WindowSystemInterface { @@ -930,6 +935,7 @@ impl crate::Instance for Instance { kind: wsi_kind, }, flags: desc.flags, + options: desc.backend_options.gl.clone(), inner: Mutex::new(inner), }) } @@ -1089,10 +1095,13 @@ impl crate::Instance for Instance { inner.egl.unmake_current(); unsafe { - super::Adapter::expose(AdapterContext { - glow: Mutex::new(gl), - egl: Some(inner.egl.clone()), - }) + super::Adapter::expose( + AdapterContext { + glow: Mutex::new(gl), + egl: Some(inner.egl.clone()), + }, + self.options.clone(), + ) } .into_iter() .collect() @@ -1111,13 +1120,17 @@ impl super::Adapter { /// dropping any objects returned from this adapter. pub unsafe fn new_external( fun: impl FnMut(&str) -> *const ffi::c_void, + options: wgt::GlBackendOptions, ) -> Option> { let context = unsafe { glow::Context::from_loader_function(fun) }; unsafe { - Self::expose(AdapterContext { - glow: Mutex::new(ManuallyDrop::new(context)), - egl: None, - }) + Self::expose( + AdapterContext { + glow: Mutex::new(ManuallyDrop::new(context)), + egl: None, + }, + options, + ) } } diff --git a/wgpu-hal/src/gles/fence.rs b/wgpu-hal/src/gles/fence.rs new file mode 100644 index 0000000000..d87e0ad742 --- /dev/null +++ b/wgpu-hal/src/gles/fence.rs @@ -0,0 +1,167 @@ +use std::sync::atomic::Ordering; + +use glow::HasContext; + +use crate::AtomicFenceValue; + +#[derive(Debug, Copy, Clone)] +struct GLFence { + sync: glow::Fence, + value: crate::FenceValue, +} + +#[derive(Debug)] +pub struct Fence { + last_completed: AtomicFenceValue, + pending: Vec, + fence_mode: wgt::GlFenceBehavior, +} + +impl crate::DynFence for Fence {} + +#[cfg(send_sync)] +unsafe impl Send for Fence {} +#[cfg(send_sync)] +unsafe impl Sync for Fence {} + +impl Fence { + pub fn new(options: &wgt::GlBackendOptions) -> Self { + Self { + last_completed: AtomicFenceValue::new(0), + pending: Vec::new(), + fence_mode: options.short_circuit_fences, + } + } + + pub fn signal( + &mut self, + gl: &glow::Context, + value: crate::FenceValue, + ) -> Result<(), crate::DeviceError> { + if self.fence_mode.is_auto_finish() { + *self.last_completed.get_mut() = value; + return Ok(()); + } + + let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) } + .map_err(|_| crate::DeviceError::OutOfMemory)?; + self.pending.push(GLFence { sync, value }); + + Ok(()) + } + + pub fn satisfied(&self, value: crate::FenceValue) -> bool { + self.last_completed.load(Ordering::Acquire) >= value + } + + pub fn get_latest(&self, gl: &glow::Context) -> crate::FenceValue { + let mut max_value = self.last_completed.load(Ordering::Acquire); + + if self.fence_mode.is_auto_finish() { + return max_value; + } + + for gl_fence in self.pending.iter() { + if gl_fence.value <= max_value { + // We already know this was good, no need to check again + continue; + } + let status = unsafe { gl.get_sync_status(gl_fence.sync) }; + if status == glow::SIGNALED { + max_value = gl_fence.value; + } else { + // Anything after the first unsignalled is guaranteed to also be unsignalled + break; + } + } + + // Track the latest value, to save ourselves some querying later + self.last_completed.fetch_max(max_value, Ordering::AcqRel); + + max_value + } + + pub fn maintain(&mut self, gl: &glow::Context) { + if self.fence_mode.is_auto_finish() { + return; + } + + let latest = self.get_latest(gl); + for &gl_fence in self.pending.iter() { + if gl_fence.value <= latest { + unsafe { + gl.delete_sync(gl_fence.sync); + } + } + } + self.pending.retain(|&gl_fence| gl_fence.value > latest); + } + + pub fn wait( + &self, + gl: &glow::Context, + wait_value: crate::FenceValue, + timeout_ns: u64, + ) -> Result { + let last_completed = self.last_completed.load(Ordering::Acquire); + + if self.fence_mode.is_auto_finish() { + return Ok(last_completed >= wait_value); + } + + // We already know this fence has been signalled to that value. Return signalled. + if last_completed >= wait_value { + return Ok(true); + } + + // Find a matching fence + let gl_fence = self + .pending + .iter() + // Greater or equal as an abundance of caution, but there should be one fence per value + .find(|gl_fence| gl_fence.value >= wait_value); + + let Some(gl_fence) = gl_fence else { + log::warn!("Tried to wait for {wait_value} but that value has not been signalled yet"); + return Ok(false); + }; + + // We should have found a fence with the exact value. + debug_assert_eq!(gl_fence.value, wait_value); + + let status = unsafe { + gl.client_wait_sync( + gl_fence.sync, + glow::SYNC_FLUSH_COMMANDS_BIT, + timeout_ns as i32, + ) + }; + + let signalled = match status { + glow::ALREADY_SIGNALED | glow::CONDITION_SATISFIED => true, + glow::TIMEOUT_EXPIRED | glow::WAIT_FAILED => false, + _ => { + log::warn!("Unexpected result from client_wait_sync: {status}"); + false + } + }; + + if signalled { + self.last_completed.fetch_max(wait_value, Ordering::AcqRel); + } + + Ok(signalled) + } + + pub fn destroy(self, gl: &glow::Context) { + if self.fence_mode.is_auto_finish() { + return; + } + + for gl_fence in self.pending { + unsafe { + gl.delete_sync(gl_fence.sync); + } + } + } +} diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 2b693a2dd8..478f2c433c 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -95,10 +95,13 @@ mod adapter; mod command; mod conv; mod device; +mod fence; mod queue; use crate::{CopyExtent, TextureDescriptor}; +pub use fence::Fence; + #[cfg(not(any(windows, webgl)))] pub use self::egl::{AdapterContext, AdapterContextLock}; #[cfg(not(any(windows, webgl)))] @@ -120,7 +123,7 @@ use glow::HasContext; use naga::FastHashMap; use parking_lot::Mutex; -use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; +use std::sync::atomic::{AtomicU32, AtomicU8}; use std::{fmt, ops::Range, sync::Arc}; #[derive(Clone, Debug)] @@ -272,6 +275,7 @@ struct AdapterShared { private_caps: PrivateCapabilities, features: wgt::Features, workarounds: Workarounds, + options: wgt::GlBackendOptions, shading_language_version: naga::back::glsl::Version, next_shader_id: AtomicU32, program_cache: Mutex, @@ -732,67 +736,6 @@ pub struct QuerySet { impl crate::DynQuerySet for QuerySet {} -#[derive(Debug)] -pub struct Fence { - last_completed: crate::AtomicFenceValue, - pending: Vec<(crate::FenceValue, glow::Fence)>, -} - -impl crate::DynFence for Fence {} - -#[cfg(any( - not(target_arch = "wasm32"), - all( - feature = "fragile-send-sync-non-atomic-wasm", - not(target_feature = "atomics") - ) -))] -unsafe impl Send for Fence {} -#[cfg(any( - not(target_arch = "wasm32"), - all( - feature = "fragile-send-sync-non-atomic-wasm", - not(target_feature = "atomics") - ) -))] -unsafe impl Sync for Fence {} - -impl Fence { - fn get_latest(&self, gl: &glow::Context) -> crate::FenceValue { - let mut max_value = self.last_completed.load(Ordering::Relaxed); - for &(value, sync) in self.pending.iter() { - if value <= max_value { - // We already know this was good, no need to check again - continue; - } - let status = unsafe { gl.get_sync_status(sync) }; - if status == glow::SIGNALED { - max_value = value; - } else { - // Anything after the first unsignalled is guaranteed to also be unsignalled - break; - } - } - - // Track the latest value, to save ourselves some querying later - self.last_completed.fetch_max(max_value, Ordering::Relaxed); - - max_value - } - - fn maintain(&mut self, gl: &glow::Context) { - let latest = self.get_latest(gl); - for &(value, sync) in self.pending.iter() { - if value <= latest { - unsafe { - gl.delete_sync(sync); - } - } - } - self.pending.retain(|&(value, _)| value > latest); - } -} - #[derive(Debug)] pub struct AccelerationStructure; @@ -979,8 +922,8 @@ enum Command { // It is also more efficient to emit a single command instead of two for // this. ClearDepthAndStencil(f32, u32), - BufferBarrier(glow::Buffer, crate::BufferUses), - TextureBarrier(crate::TextureUses), + BufferBarrier(glow::Buffer, wgt::BufferUses), + TextureBarrier(wgt::TextureUses), SetViewport { rect: crate::Rect, depth: Range, diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 8896aa4ed0..1be46ceed2 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1197,35 +1197,35 @@ impl super::Queue { } C::BufferBarrier(raw, usage) => { let mut flags = 0; - if usage.contains(crate::BufferUses::VERTEX) { + if usage.contains(wgt::BufferUses::VERTEX) { flags |= glow::VERTEX_ATTRIB_ARRAY_BARRIER_BIT; unsafe { gl.bind_buffer(glow::ARRAY_BUFFER, Some(raw)) }; unsafe { gl.vertex_attrib_pointer_f32(0, 1, glow::BYTE, true, 0, 0) }; } - if usage.contains(crate::BufferUses::INDEX) { + if usage.contains(wgt::BufferUses::INDEX) { flags |= glow::ELEMENT_ARRAY_BARRIER_BIT; unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(raw)) }; } - if usage.contains(crate::BufferUses::UNIFORM) { + if usage.contains(wgt::BufferUses::UNIFORM) { flags |= glow::UNIFORM_BARRIER_BIT; } - if usage.contains(crate::BufferUses::INDIRECT) { + if usage.contains(wgt::BufferUses::INDIRECT) { flags |= glow::COMMAND_BARRIER_BIT; unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(raw)) }; } - if usage.contains(crate::BufferUses::COPY_SRC) { + if usage.contains(wgt::BufferUses::COPY_SRC) { flags |= glow::PIXEL_BUFFER_BARRIER_BIT; unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(raw)) }; } - if usage.contains(crate::BufferUses::COPY_DST) { + if usage.contains(wgt::BufferUses::COPY_DST) { flags |= glow::PIXEL_BUFFER_BARRIER_BIT; unsafe { gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(raw)) }; } - if usage.intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE) { + if usage.intersects(wgt::BufferUses::MAP_READ | wgt::BufferUses::MAP_WRITE) { flags |= glow::BUFFER_UPDATE_BARRIER_BIT; } if usage.intersects( - crate::BufferUses::STORAGE_READ_ONLY | crate::BufferUses::STORAGE_READ_WRITE, + wgt::BufferUses::STORAGE_READ_ONLY | wgt::BufferUses::STORAGE_READ_WRITE, ) { flags |= glow::SHADER_STORAGE_BARRIER_BIT; } @@ -1233,23 +1233,23 @@ impl super::Queue { } C::TextureBarrier(usage) => { let mut flags = 0; - if usage.contains(crate::TextureUses::RESOURCE) { + if usage.contains(wgt::TextureUses::RESOURCE) { flags |= glow::TEXTURE_FETCH_BARRIER_BIT; } if usage.intersects( - crate::TextureUses::STORAGE_READ_ONLY - | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE, + wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE, ) { flags |= glow::SHADER_IMAGE_ACCESS_BARRIER_BIT; } - if usage.contains(crate::TextureUses::COPY_DST) { + if usage.contains(wgt::TextureUses::COPY_DST) { flags |= glow::TEXTURE_UPDATE_BARRIER_BIT; } if usage.intersects( - crate::TextureUses::COLOR_TARGET - | crate::TextureUses::DEPTH_STENCIL_READ - | crate::TextureUses::DEPTH_STENCIL_WRITE, + wgt::TextureUses::COLOR_TARGET + | wgt::TextureUses::DEPTH_STENCIL_READ + | wgt::TextureUses::DEPTH_STENCIL_WRITE, ) { flags |= glow::FRAMEBUFFER_BARRIER_BIT; } @@ -1856,9 +1856,12 @@ impl crate::Queue for super::Queue { } signal_fence.maintain(gl); - let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) } - .map_err(|_| crate::DeviceError::OutOfMemory)?; - signal_fence.pending.push((signal_value, sync)); + signal_fence.signal(gl, signal_value)?; + + // This is extremely important. If we don't flush, the above fences may never + // be signaled, particularly in headless contexts. Headed contexts will + // often flush every so often, but headless contexts may not. + unsafe { gl.flush() }; Ok(()) } diff --git a/wgpu-hal/src/gles/web.rs b/wgpu-hal/src/gles/web.rs index 06bd871247..be02db8619 100644 --- a/wgpu-hal/src/gles/web.rs +++ b/wgpu-hal/src/gles/web.rs @@ -25,7 +25,9 @@ impl AdapterContext { } #[derive(Debug)] -pub struct Instance; +pub struct Instance { + options: wgt::GlBackendOptions, +} impl Instance { pub fn create_surface_from_canvas( @@ -110,9 +112,11 @@ unsafe impl Send for Instance {} impl crate::Instance for Instance { type A = super::Api; - unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { profiling::scope!("Init OpenGL (WebGL) Backend"); - Ok(Instance) + Ok(Instance { + options: desc.backend_options.gl.clone(), + }) } unsafe fn enumerate_adapters( @@ -123,10 +127,13 @@ impl crate::Instance for Instance { let gl = glow::Context::from_webgl2_context(surface_hint.webgl2_context.clone()); unsafe { - super::Adapter::expose(AdapterContext { - glow_context: gl, - webgl2_context: surface_hint.webgl2_context.clone(), - }) + super::Adapter::expose( + AdapterContext { + glow_context: gl, + webgl2_context: surface_hint.webgl2_context.clone(), + }, + self.options.clone(), + ) } .into_iter() .collect() diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs index 2d6c91aee0..41759f3817 100644 --- a/wgpu-hal/src/gles/wgl.rs +++ b/wgpu-hal/src/gles/wgl.rs @@ -1,5 +1,4 @@ use std::{ - collections::HashSet, ffi::{c_void, CStr, CString}, mem::{self, size_of, size_of_val, ManuallyDrop}, os::raw::c_int, @@ -17,6 +16,7 @@ use glutin_wgl_sys::wgl_extra::{ Wgl, CONTEXT_CORE_PROFILE_BIT_ARB, CONTEXT_DEBUG_BIT_ARB, CONTEXT_FLAGS_ARB, CONTEXT_PROFILE_MASK_ARB, }; +use hashbrown::HashSet; use once_cell::sync::Lazy; use parking_lot::{Mutex, MutexGuard, RwLock}; use raw_window_handle::{RawDisplayHandle, RawWindowHandle}; @@ -177,6 +177,7 @@ unsafe impl Sync for Inner {} pub struct Instance { srgb_capable: bool, + options: wgt::GlBackendOptions, inner: Arc>, } @@ -542,6 +543,7 @@ impl crate::Instance for Instance { gl, context: Some(context), })), + options: desc.backend_options.gl.clone(), srgb_capable, }) } @@ -573,9 +575,12 @@ impl crate::Instance for Instance { _surface_hint: Option<&Surface>, ) -> Vec> { unsafe { - super::Adapter::expose(AdapterContext { - inner: self.inner.clone(), - }) + super::Adapter::expose( + AdapterContext { + inner: self.inner.clone(), + }, + self.options.clone(), + ) } .into_iter() .collect() @@ -594,16 +599,20 @@ impl super::Adapter { /// dropping any objects returned from this adapter. pub unsafe fn new_external( fun: impl FnMut(&str) -> *const c_void, + options: wgt::GlBackendOptions, ) -> Option> { let context = unsafe { glow::Context::from_loader_function(fun) }; unsafe { - Self::expose(AdapterContext { - inner: Arc::new(Mutex::new(Inner { - gl: ManuallyDrop::new(context), - device: create_instance_device().ok()?, - context: None, - })), - }) + Self::expose( + AdapterContext { + inner: Arc::new(Mutex::new(Inner { + gl: ManuallyDrop::new(context), + device: create_instance_device().ok()?, + context: None, + })), + }, + options, + ) } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 4cc0ef80bd..fd5f272b61 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -234,6 +234,8 @@ unused_qualifications )] +extern crate wgpu_types as wgt; + /// DirectX12 API internals. #[cfg(dx12)] pub mod dx12; @@ -708,7 +710,7 @@ pub trait Device: WasmNotSendSync { /// Creates a new buffer. /// - /// The initial usage is `BufferUses::empty()`. + /// The initial usage is `wgt::BufferUses::empty()`. unsafe fn create_buffer( &self, desc: &BufferDescriptor, @@ -774,8 +776,8 @@ pub trait Device: WasmNotSendSync { /// - The returned [`BufferMapping::ptr`] must not be used after a call to /// [`Device::unmap_buffer`]. /// - /// [`MAP_READ`]: BufferUses::MAP_READ - /// [`MAP_WRITE`]: BufferUses::MAP_WRITE + /// [`MAP_READ`]: wgt::BufferUses::MAP_READ + /// [`MAP_WRITE`]: wgt::BufferUses::MAP_WRITE unsafe fn map_buffer( &self, buffer: &::Buffer, @@ -813,7 +815,7 @@ pub trait Device: WasmNotSendSync { /// Creates a new texture. /// - /// The initial usage for all subresources is `TextureUses::UNINITIALIZED`. + /// The initial usage for all subresources is `wgt::TextureUses::UNINITIALIZED`. unsafe fn create_texture( &self, desc: &TextureDescriptor, @@ -1198,7 +1200,7 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { /// Copy from an external image to an internal texture. /// Works with a single array layer. - /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: `dst` current usage has to be `wgt::TextureUses::COPY_DST`. /// Note: the copy extent is in physical size (rounded to the block size) #[cfg(webgl)] unsafe fn copy_external_image_to_texture( @@ -1212,12 +1214,12 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { /// Copy from one texture to another. /// Works with a single array layer. - /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: `dst` current usage has to be `wgt::TextureUses::COPY_DST`. /// Note: the copy extent is in physical size (rounded to the block size) unsafe fn copy_texture_to_texture( &mut self, src: &::Texture, - src_usage: TextureUses, + src_usage: wgt::TextureUses, dst: &::Texture, regions: T, ) where @@ -1225,7 +1227,7 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { /// Copy from buffer to texture. /// Works with a single array layer. - /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: `dst` current usage has to be `wgt::TextureUses::COPY_DST`. /// Note: the copy extent is in physical size (rounded to the block size) unsafe fn copy_buffer_to_texture( &mut self, @@ -1241,7 +1243,7 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { unsafe fn copy_texture_to_buffer( &mut self, src: &::Texture, - src_usage: TextureUses, + src_usage: wgt::TextureUses, dst: &::Buffer, regions: T, ) where @@ -1660,99 +1662,11 @@ bitflags!( } ); -bitflags::bitflags! { - /// Similar to `wgt::BufferUsages` but for internal use. - #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct BufferUses: u16 { - /// The argument to a read-only mapping. - const MAP_READ = 1 << 0; - /// The argument to a write-only mapping. - const MAP_WRITE = 1 << 1; - /// The source of a hardware copy. - const COPY_SRC = 1 << 2; - /// The destination of a hardware copy. - const COPY_DST = 1 << 3; - /// The index buffer used for drawing. - const INDEX = 1 << 4; - /// A vertex buffer used for drawing. - const VERTEX = 1 << 5; - /// A uniform buffer bound in a bind group. - const UNIFORM = 1 << 6; - /// A read-only storage buffer used in a bind group. - const STORAGE_READ_ONLY = 1 << 7; - /// A read-write buffer used in a bind group. - const STORAGE_READ_WRITE = 1 << 8; - /// The indirect or count buffer in a indirect draw or dispatch. - const INDIRECT = 1 << 9; - /// A buffer used to store query results. - const QUERY_RESOLVE = 1 << 10; - const ACCELERATION_STRUCTURE_SCRATCH = 1 << 11; - const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; - const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 13; - /// The combination of states that a buffer may be in _at the same time_. - const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() | - Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() | - Self::STORAGE_READ_ONLY.bits() | Self::INDIRECT.bits() | Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits() | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits(); - /// The combination of states that a buffer must exclusively be in. - const EXCLUSIVE = Self::MAP_WRITE.bits() | Self::COPY_DST.bits() | Self::STORAGE_READ_WRITE.bits() | Self::ACCELERATION_STRUCTURE_SCRATCH.bits(); - /// The combination of all usages that the are guaranteed to be be ordered by the hardware. - /// If a usage is ordered, then if the buffer state doesn't change between draw calls, there - /// are no barriers needed for synchronization. - const ORDERED = Self::INCLUSIVE.bits() | Self::MAP_WRITE.bits(); - } -} - -bitflags::bitflags! { - /// Similar to `wgt::TextureUsages` but for internal use. - #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct TextureUses: u16 { - /// The texture is in unknown state. - const UNINITIALIZED = 1 << 0; - /// Ready to present image to the surface. - const PRESENT = 1 << 1; - /// The source of a hardware copy. - const COPY_SRC = 1 << 2; - /// The destination of a hardware copy. - const COPY_DST = 1 << 3; - /// Read-only sampled or fetched resource. - const RESOURCE = 1 << 4; - /// The color target of a renderpass. - const COLOR_TARGET = 1 << 5; - /// Read-only depth stencil usage. - const DEPTH_STENCIL_READ = 1 << 6; - /// Read-write depth stencil usage - const DEPTH_STENCIL_WRITE = 1 << 7; - /// Read-only storage texture usage. Corresponds to a UAV in d3d, so is exclusive, despite being read only. - const STORAGE_READ_ONLY = 1 << 8; - /// Write-only storage texture usage. - const STORAGE_WRITE_ONLY = 1 << 9; - /// Read-write storage texture usage. - const STORAGE_READ_WRITE = 1 << 10; - /// Image atomic enabled storage - const STORAGE_ATOMIC = 1 << 11; - /// The combination of states that a texture may be in _at the same time_. - const INCLUSIVE = Self::COPY_SRC.bits() | Self::RESOURCE.bits() | Self::DEPTH_STENCIL_READ.bits(); - /// The combination of states that a texture must exclusively be in. - const EXCLUSIVE = Self::COPY_DST.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ_ONLY.bits() | Self::STORAGE_WRITE_ONLY.bits() | Self::STORAGE_READ_WRITE.bits() | Self::STORAGE_ATOMIC.bits() | Self::PRESENT.bits(); - /// The combination of all usages that the are guaranteed to be be ordered by the hardware. - /// If a usage is ordered, then if the texture state doesn't change between draw calls, there - /// are no barriers needed for synchronization. - const ORDERED = Self::INCLUSIVE.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ_ONLY.bits(); - - /// Flag used by the wgpu-core texture tracker to say a texture is in different states for every sub-resource - const COMPLEX = 1 << 12; - /// Flag used by the wgpu-core texture tracker to say that the tracker does not know the state of the sub-resource. - /// This is different from UNINITIALIZED as that says the tracker does know, but the texture has not been initialized. - const UNKNOWN = 1 << 13; - } -} - #[derive(Clone, Debug)] pub struct InstanceDescriptor<'a> { pub name: &'a str, pub flags: wgt::InstanceFlags, - pub dx12_shader_compiler: wgt::Dx12Compiler, - pub gles_minor_version: wgt::Gles3MinorVersion, + pub backend_options: wgt::BackendOptions, } #[derive(Clone, Debug)] @@ -1826,8 +1740,8 @@ pub struct SurfaceCapabilities { /// Supported texture usage flags. /// - /// Must have at least `TextureUses::COLOR_TARGET` - pub usage: TextureUses, + /// Must have at least `wgt::TextureUses::COLOR_TARGET` + pub usage: wgt::TextureUses, /// List of supported V-sync modes. /// @@ -1865,7 +1779,7 @@ pub struct BufferMapping { pub struct BufferDescriptor<'a> { pub label: Label<'a>, pub size: wgt::BufferAddress, - pub usage: BufferUses, + pub usage: wgt::BufferUses, pub memory_flags: MemoryFlags, } @@ -1877,7 +1791,7 @@ pub struct TextureDescriptor<'a> { pub sample_count: u32, pub dimension: wgt::TextureDimension, pub format: wgt::TextureFormat, - pub usage: TextureUses, + pub usage: wgt::TextureUses, pub memory_flags: MemoryFlags, /// Allows views of this texture to have a different format /// than the texture does. @@ -1916,7 +1830,7 @@ pub struct TextureViewDescriptor<'a> { pub label: Label<'a>, pub format: wgt::TextureFormat, pub dimension: wgt::TextureViewDimension, - pub usage: TextureUses, + pub usage: wgt::TextureUses, pub range: wgt::ImageSubresourceRange, } @@ -2028,7 +1942,7 @@ impl<'a, T: DynBuffer + ?Sized> Clone for BufferBinding<'a, T> { #[derive(Debug)] pub struct TextureBinding<'a, T: DynTextureView + ?Sized> { pub view: &'a T, - pub usage: TextureUses, + pub usage: wgt::TextureUses, } impl<'a, T: DynTextureView + ?Sized> Clone for TextureBinding<'a, T> { @@ -2230,7 +2144,7 @@ pub struct SurfaceConfiguration { /// `SurfaceCapabilities::extents` range. pub extent: wgt::Extent3d, /// Allowed usage of surface textures, - pub usage: TextureUses, + pub usage: wgt::TextureUses, /// Allows views of swapchain texture to have a different format /// than the texture does. pub view_formats: Vec, @@ -2253,14 +2167,14 @@ pub struct StateTransition { #[derive(Debug, Clone)] pub struct BufferBarrier<'a, B: DynBuffer + ?Sized> { pub buffer: &'a B, - pub usage: StateTransition, + pub usage: StateTransition, } #[derive(Debug, Clone)] pub struct TextureBarrier<'a, T: DynTexture + ?Sized> { pub texture: &'a T, pub range: wgt::ImageSubresourceRange, - pub usage: StateTransition, + pub usage: StateTransition, } #[derive(Clone, Copy, Debug)] @@ -2306,7 +2220,7 @@ pub struct Attachment<'a, T: DynTextureView + ?Sized> { pub view: &'a T, /// Contains either a single mutating usage as a target, /// or a valid combination of read-only usages. - pub usage: TextureUses, + pub usage: wgt::TextureUses, } #[derive(Clone, Debug)] @@ -2530,7 +2444,7 @@ pub struct AccelerationStructureBarrier { #[derive(Debug, Copy, Clone)] pub struct TlasInstance { pub transform: [f32; 12], - pub custom_index: u32, + pub custom_data: u32, pub mask: u8, pub blas_address: u64, } diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index ecff2b7a6a..dae4cb3322 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -377,12 +377,12 @@ impl crate::Adapter for super::Adapter { ], current_extent, - usage: crate::TextureUses::COLOR_TARGET - | crate::TextureUses::COPY_SRC - | crate::TextureUses::COPY_DST - | crate::TextureUses::STORAGE_READ_ONLY - | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE, + usage: wgt::TextureUses::COLOR_TARGET + | wgt::TextureUses::COPY_SRC + | wgt::TextureUses::COPY_DST + | wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE, }) } diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index a66349cbf4..c3f2c8cc59 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -279,7 +279,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_texture( &mut self, src: &super::Texture, - _src_usage: crate::TextureUses, + _src_usage: wgt::TextureUses, dst: &super::Texture, regions: T, ) where @@ -358,7 +358,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_buffer( &mut self, src: &super::Texture, - _src_usage: crate::TextureUses, + _src_usage: wgt::TextureUses, dst: &super::Buffer, regions: T, ) where diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index fecd3ffa09..350f4cbb6b 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -1,8 +1,8 @@ pub fn map_texture_usage( format: wgt::TextureFormat, - usage: crate::TextureUses, + usage: wgt::TextureUses, ) -> metal::MTLTextureUsage { - use crate::TextureUses as Tu; + use wgt::TextureUses as Tu; let mut mtl_usage = metal::MTLTextureUsage::Unknown; diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 51a2c48752..065e23ab47 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -244,7 +244,7 @@ impl super::Device { } naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => { let br = match var.binding { - Some(ref br) => br.clone(), + Some(br) => br, None => continue, }; let storage_access_store = match var.space { @@ -340,8 +340,8 @@ impl crate::Device for super::Device { type A = super::Api; unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { - let map_read = desc.usage.contains(crate::BufferUses::MAP_READ); - let map_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + let map_read = desc.usage.contains(wgt::BufferUses::MAP_READ); + let map_write = desc.usage.contains(wgt::BufferUses::MAP_WRITE); let mut options = metal::MTLResourceOptions::empty(); options |= if map_read || map_write { diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 6f0d478ef1..8a3ff41bbe 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -26,7 +26,6 @@ mod surface; mod time; use std::{ - collections::HashMap, fmt, iter, ops, ptr::NonNull, sync::{atomic, Arc}, @@ -35,7 +34,9 @@ use std::{ use arrayvec::ArrayVec; use bitflags::bitflags; +use hashbrown::HashMap; use metal::foreign_types::ForeignTypeRef as _; +use naga::FastHashMap; use parking_lot::{Mutex, RwLock}; #[derive(Clone, Debug)] @@ -937,9 +938,9 @@ struct CommandState { /// See `device::CompiledShader::sized_bindings` for more details. /// /// [`ResourceBinding`]: naga::ResourceBinding - storage_buffer_length_map: rustc_hash::FxHashMap, + storage_buffer_length_map: FastHashMap, - vertex_buffer_size_map: rustc_hash::FxHashMap, + vertex_buffer_size_map: FastHashMap, work_group_memory_sizes: Vec, push_constants: Vec, diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs index b35c73c910..5f4bcaeb81 100644 --- a/wgpu-hal/src/metal/surface.rs +++ b/wgpu-hal/src/metal/surface.rs @@ -289,7 +289,7 @@ impl crate::Surface for super::Surface { *self.extent.write() = config.extent; let render_layer = self.render_layer.lock(); - let framebuffer_only = config.usage == crate::TextureUses::COLOR_TARGET; + let framebuffer_only = config.usage == wgt::TextureUses::COLOR_TARGET; let display_sync = match config.present_mode { wgt::PresentMode::Fifo => true, wgt::PresentMode::Immediate => false, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 16115ab0f9..a271715b78 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -2012,6 +2012,7 @@ impl super::Adapter { device: Arc::clone(&shared), family_index, relay_semaphores: Mutex::new(relay_semaphores), + signal_semaphores: Mutex::new((Vec::new(), Vec::new())), }; let mem_allocator = { diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 8c6c5281fe..8e5f243ee5 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -285,7 +285,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_texture( &mut self, src: &super::Texture, - src_usage: crate::TextureUses, + src_usage: wgt::TextureUses, dst: &super::Texture, regions: T, ) where @@ -345,7 +345,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_buffer( &mut self, src: &super::Texture, - src_usage: crate::TextureUses, + src_usage: wgt::TextureUses, dst: &super::Buffer, regions: T, ) where @@ -1157,7 +1157,7 @@ impl crate::CommandEncoder for super::CommandEncoder { #[test] fn check_dst_image_layout() { assert_eq!( - conv::derive_image_layout(crate::TextureUses::COPY_DST, wgt::TextureFormat::Rgba8Unorm), + conv::derive_image_layout(wgt::TextureUses::COPY_DST, wgt::TextureFormat::Rgba8Unorm), DST_IMAGE_LAYOUT ); } diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 0a210868a5..13da719b63 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -218,23 +218,18 @@ impl crate::ColorAttachment<'_, super::TextureView> { } } -pub fn derive_image_layout( - usage: crate::TextureUses, - format: wgt::TextureFormat, -) -> vk::ImageLayout { +pub fn derive_image_layout(usage: wgt::TextureUses, format: wgt::TextureFormat) -> vk::ImageLayout { // Note: depth textures are always sampled with RODS layout let is_color = !format.is_depth_stencil_format(); match usage { - crate::TextureUses::UNINITIALIZED => vk::ImageLayout::UNDEFINED, - crate::TextureUses::COPY_SRC => vk::ImageLayout::TRANSFER_SRC_OPTIMAL, - crate::TextureUses::COPY_DST => vk::ImageLayout::TRANSFER_DST_OPTIMAL, - crate::TextureUses::RESOURCE if is_color => vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL, - crate::TextureUses::COLOR_TARGET => vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL, - crate::TextureUses::DEPTH_STENCIL_WRITE => { - vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL - } + wgt::TextureUses::UNINITIALIZED => vk::ImageLayout::UNDEFINED, + wgt::TextureUses::COPY_SRC => vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + wgt::TextureUses::COPY_DST => vk::ImageLayout::TRANSFER_DST_OPTIMAL, + wgt::TextureUses::RESOURCE if is_color => vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL, + wgt::TextureUses::COLOR_TARGET => vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + wgt::TextureUses::DEPTH_STENCIL_WRITE => vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL, _ => { - if usage == crate::TextureUses::PRESENT { + if usage == wgt::TextureUses::PRESENT { vk::ImageLayout::PRESENT_SRC_KHR } else if is_color { vk::ImageLayout::GENERAL @@ -245,30 +240,30 @@ pub fn derive_image_layout( } } -pub fn map_texture_usage(usage: crate::TextureUses) -> vk::ImageUsageFlags { +pub fn map_texture_usage(usage: wgt::TextureUses) -> vk::ImageUsageFlags { let mut flags = vk::ImageUsageFlags::empty(); - if usage.contains(crate::TextureUses::COPY_SRC) { + if usage.contains(wgt::TextureUses::COPY_SRC) { flags |= vk::ImageUsageFlags::TRANSFER_SRC; } - if usage.contains(crate::TextureUses::COPY_DST) { + if usage.contains(wgt::TextureUses::COPY_DST) { flags |= vk::ImageUsageFlags::TRANSFER_DST; } - if usage.contains(crate::TextureUses::RESOURCE) { + if usage.contains(wgt::TextureUses::RESOURCE) { flags |= vk::ImageUsageFlags::SAMPLED; } - if usage.contains(crate::TextureUses::COLOR_TARGET) { + if usage.contains(wgt::TextureUses::COLOR_TARGET) { flags |= vk::ImageUsageFlags::COLOR_ATTACHMENT; } - if usage.intersects( - crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, - ) { + if usage + .intersects(wgt::TextureUses::DEPTH_STENCIL_READ | wgt::TextureUses::DEPTH_STENCIL_WRITE) + { flags |= vk::ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT; } if usage.intersects( - crate::TextureUses::STORAGE_READ_ONLY - | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE - | crate::TextureUses::STORAGE_ATOMIC, + wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE + | wgt::TextureUses::STORAGE_ATOMIC, ) { flags |= vk::ImageUsageFlags::STORAGE; } @@ -276,7 +271,7 @@ pub fn map_texture_usage(usage: crate::TextureUses) -> vk::ImageUsageFlags { } pub fn map_texture_usage_to_barrier( - usage: crate::TextureUses, + usage: wgt::TextureUses, ) -> (vk::PipelineStageFlags, vk::AccessFlags) { let mut stages = vk::PipelineStageFlags::empty(); let mut access = vk::AccessFlags::empty(); @@ -284,51 +279,51 @@ pub fn map_texture_usage_to_barrier( | vk::PipelineStageFlags::FRAGMENT_SHADER | vk::PipelineStageFlags::COMPUTE_SHADER; - if usage.contains(crate::TextureUses::COPY_SRC) { + if usage.contains(wgt::TextureUses::COPY_SRC) { stages |= vk::PipelineStageFlags::TRANSFER; access |= vk::AccessFlags::TRANSFER_READ; } - if usage.contains(crate::TextureUses::COPY_DST) { + if usage.contains(wgt::TextureUses::COPY_DST) { stages |= vk::PipelineStageFlags::TRANSFER; access |= vk::AccessFlags::TRANSFER_WRITE; } - if usage.contains(crate::TextureUses::RESOURCE) { + if usage.contains(wgt::TextureUses::RESOURCE) { stages |= shader_stages; access |= vk::AccessFlags::SHADER_READ; } - if usage.contains(crate::TextureUses::COLOR_TARGET) { + if usage.contains(wgt::TextureUses::COLOR_TARGET) { stages |= vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT; access |= vk::AccessFlags::COLOR_ATTACHMENT_READ | vk::AccessFlags::COLOR_ATTACHMENT_WRITE; } - if usage.intersects(crate::TextureUses::DEPTH_STENCIL_READ) { + if usage.intersects(wgt::TextureUses::DEPTH_STENCIL_READ) { stages |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS; access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ; } - if usage.intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) { + if usage.intersects(wgt::TextureUses::DEPTH_STENCIL_WRITE) { stages |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS; access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ | vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE; } if usage.intersects( - crate::TextureUses::STORAGE_READ_ONLY - | crate::TextureUses::STORAGE_READ_WRITE - | crate::TextureUses::STORAGE_ATOMIC, + wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE + | wgt::TextureUses::STORAGE_ATOMIC, ) { stages |= shader_stages; access |= vk::AccessFlags::SHADER_READ; } if usage.intersects( - crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE - | crate::TextureUses::STORAGE_ATOMIC, + wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE + | wgt::TextureUses::STORAGE_ATOMIC, ) { stages |= shader_stages; access |= vk::AccessFlags::SHADER_WRITE; } - if usage == crate::TextureUses::UNINITIALIZED || usage == crate::TextureUses::PRESENT { + if usage == wgt::TextureUses::UNINITIALIZED || usage == wgt::TextureUses::PRESENT { ( vk::PipelineStageFlags::TOP_OF_PIPE, vk::AccessFlags::empty(), @@ -338,28 +333,28 @@ pub fn map_texture_usage_to_barrier( } } -pub fn map_vk_image_usage(usage: vk::ImageUsageFlags) -> crate::TextureUses { - let mut bits = crate::TextureUses::empty(); +pub fn map_vk_image_usage(usage: vk::ImageUsageFlags) -> wgt::TextureUses { + let mut bits = wgt::TextureUses::empty(); if usage.contains(vk::ImageUsageFlags::TRANSFER_SRC) { - bits |= crate::TextureUses::COPY_SRC; + bits |= wgt::TextureUses::COPY_SRC; } if usage.contains(vk::ImageUsageFlags::TRANSFER_DST) { - bits |= crate::TextureUses::COPY_DST; + bits |= wgt::TextureUses::COPY_DST; } if usage.contains(vk::ImageUsageFlags::SAMPLED) { - bits |= crate::TextureUses::RESOURCE; + bits |= wgt::TextureUses::RESOURCE; } if usage.contains(vk::ImageUsageFlags::COLOR_ATTACHMENT) { - bits |= crate::TextureUses::COLOR_TARGET; + bits |= wgt::TextureUses::COLOR_TARGET; } if usage.contains(vk::ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT) { - bits |= crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE; + bits |= wgt::TextureUses::DEPTH_STENCIL_READ | wgt::TextureUses::DEPTH_STENCIL_WRITE; } if usage.contains(vk::ImageUsageFlags::STORAGE) { - bits |= crate::TextureUses::STORAGE_READ_ONLY - | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE - | crate::TextureUses::STORAGE_ATOMIC; + bits |= wgt::TextureUses::STORAGE_READ_ONLY + | wgt::TextureUses::STORAGE_WRITE_ONLY + | wgt::TextureUses::STORAGE_READ_WRITE + | wgt::TextureUses::STORAGE_ATOMIC; } bits } @@ -523,37 +518,35 @@ pub fn map_vk_composite_alpha(flags: vk::CompositeAlphaFlagsKHR) -> Vec vk::BufferUsageFlags { +pub fn map_buffer_usage(usage: wgt::BufferUses) -> vk::BufferUsageFlags { let mut flags = vk::BufferUsageFlags::empty(); - if usage.contains(crate::BufferUses::COPY_SRC) { + if usage.contains(wgt::BufferUses::COPY_SRC) { flags |= vk::BufferUsageFlags::TRANSFER_SRC; } - if usage.contains(crate::BufferUses::COPY_DST) { + if usage.contains(wgt::BufferUses::COPY_DST) { flags |= vk::BufferUsageFlags::TRANSFER_DST; } - if usage.contains(crate::BufferUses::UNIFORM) { + if usage.contains(wgt::BufferUses::UNIFORM) { flags |= vk::BufferUsageFlags::UNIFORM_BUFFER; } - if usage - .intersects(crate::BufferUses::STORAGE_READ_ONLY | crate::BufferUses::STORAGE_READ_WRITE) - { + if usage.intersects(wgt::BufferUses::STORAGE_READ_ONLY | wgt::BufferUses::STORAGE_READ_WRITE) { flags |= vk::BufferUsageFlags::STORAGE_BUFFER; } - if usage.contains(crate::BufferUses::INDEX) { + if usage.contains(wgt::BufferUses::INDEX) { flags |= vk::BufferUsageFlags::INDEX_BUFFER; } - if usage.contains(crate::BufferUses::VERTEX) { + if usage.contains(wgt::BufferUses::VERTEX) { flags |= vk::BufferUsageFlags::VERTEX_BUFFER; } - if usage.contains(crate::BufferUses::INDIRECT) { + if usage.contains(wgt::BufferUses::INDIRECT) { flags |= vk::BufferUsageFlags::INDIRECT_BUFFER; } - if usage.contains(crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH) { + if usage.contains(wgt::BufferUses::ACCELERATION_STRUCTURE_SCRATCH) { flags |= vk::BufferUsageFlags::STORAGE_BUFFER | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; } if usage.intersects( - crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT - | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + wgt::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | wgt::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, ) { flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; @@ -562,7 +555,7 @@ pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { } pub fn map_buffer_usage_to_barrier( - usage: crate::BufferUses, + usage: wgt::BufferUses, ) -> (vk::PipelineStageFlags, vk::AccessFlags) { let mut stages = vk::PipelineStageFlags::empty(); let mut access = vk::AccessFlags::empty(); @@ -570,50 +563,50 @@ pub fn map_buffer_usage_to_barrier( | vk::PipelineStageFlags::FRAGMENT_SHADER | vk::PipelineStageFlags::COMPUTE_SHADER; - if usage.contains(crate::BufferUses::MAP_READ) { + if usage.contains(wgt::BufferUses::MAP_READ) { stages |= vk::PipelineStageFlags::HOST; access |= vk::AccessFlags::HOST_READ; } - if usage.contains(crate::BufferUses::MAP_WRITE) { + if usage.contains(wgt::BufferUses::MAP_WRITE) { stages |= vk::PipelineStageFlags::HOST; access |= vk::AccessFlags::HOST_WRITE; } - if usage.contains(crate::BufferUses::COPY_SRC) { + if usage.contains(wgt::BufferUses::COPY_SRC) { stages |= vk::PipelineStageFlags::TRANSFER; access |= vk::AccessFlags::TRANSFER_READ; } - if usage.contains(crate::BufferUses::COPY_DST) { + if usage.contains(wgt::BufferUses::COPY_DST) { stages |= vk::PipelineStageFlags::TRANSFER; access |= vk::AccessFlags::TRANSFER_WRITE; } - if usage.contains(crate::BufferUses::UNIFORM) { + if usage.contains(wgt::BufferUses::UNIFORM) { stages |= shader_stages; access |= vk::AccessFlags::UNIFORM_READ; } - if usage.intersects(crate::BufferUses::STORAGE_READ_ONLY) { + if usage.intersects(wgt::BufferUses::STORAGE_READ_ONLY) { stages |= shader_stages; access |= vk::AccessFlags::SHADER_READ; } - if usage.intersects(crate::BufferUses::STORAGE_READ_WRITE) { + if usage.intersects(wgt::BufferUses::STORAGE_READ_WRITE) { stages |= shader_stages; access |= vk::AccessFlags::SHADER_READ | vk::AccessFlags::SHADER_WRITE; } - if usage.contains(crate::BufferUses::INDEX) { + if usage.contains(wgt::BufferUses::INDEX) { stages |= vk::PipelineStageFlags::VERTEX_INPUT; access |= vk::AccessFlags::INDEX_READ; } - if usage.contains(crate::BufferUses::VERTEX) { + if usage.contains(wgt::BufferUses::VERTEX) { stages |= vk::PipelineStageFlags::VERTEX_INPUT; access |= vk::AccessFlags::VERTEX_ATTRIBUTE_READ; } - if usage.contains(crate::BufferUses::INDIRECT) { + if usage.contains(wgt::BufferUses::INDIRECT) { stages |= vk::PipelineStageFlags::DRAW_INDIRECT; access |= vk::AccessFlags::INDIRECT_COMMAND_READ; } if usage.intersects( - crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT - | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT - | crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + wgt::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | wgt::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT + | wgt::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, ) { stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index d25aece9bd..16cd42c5e5 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2,12 +2,13 @@ use super::{conv, RawTlasInstance}; use arrayvec::ArrayVec; use ash::{khr, vk}; +use hashbrown::hash_map::Entry; use parking_lot::Mutex; use crate::TlasInstance; use std::{ borrow::Cow, - collections::{hash_map::Entry, BTreeMap}, + collections::BTreeMap, ffi::{CStr, CString}, mem::{self, size_of, MaybeUninit}, num::NonZeroU32, @@ -1039,17 +1040,17 @@ impl crate::Device for super::Device { let mut alloc_usage = if desc .usage - .intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE) + .intersects(wgt::BufferUses::MAP_READ | wgt::BufferUses::MAP_WRITE) { let mut flags = gpu_alloc::UsageFlags::HOST_ACCESS; //TODO: find a way to use `crate::MemoryFlags::PREFER_COHERENT` flags.set( gpu_alloc::UsageFlags::DOWNLOAD, - desc.usage.contains(crate::BufferUses::MAP_READ), + desc.usage.contains(wgt::BufferUses::MAP_READ), ); flags.set( gpu_alloc::UsageFlags::UPLOAD, - desc.usage.contains(crate::BufferUses::MAP_WRITE), + desc.usage.contains(wgt::BufferUses::MAP_WRITE), ); flags } else { @@ -2568,7 +2569,7 @@ impl crate::Device for super::Device { const MAX_U24: u32 = (1u32 << 24u32) - 1u32; let temp = RawTlasInstance { transform: instance.transform, - custom_index_and_mask: (instance.custom_index & MAX_U24) + custom_data_and_mask: (instance.custom_data & MAX_U24) | (u32::from(instance.mask) << 24), shader_binding_table_record_offset_and_flags: 0, acceleration_structure_reference: instance.blas_address, diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index 2c88013b57..d6ca55800d 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -63,6 +63,14 @@ unsafe extern "system" fn debug_utils_messenger_callback( return vk::FALSE; } + // Silence Vulkan Validation error "VUID-vkCmdCopyImageToBuffer-pRegions-00184". + // While we aren't sure yet, we suspect this is probably a VVL issue. + // https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/9276 + const VUID_VKCMDCOPYIMAGETOBUFFER_PREGIONS_00184: i32 = 0x45ef177c; + if cd.message_id_number == VUID_VKCMDCOPYIMAGETOBUFFER_PREGIONS_00184 { + return vk::FALSE; + } + let level = match message_severity { vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => log::Level::Debug, vk::DebugUtilsMessageSeverityFlagsEXT::INFO => log::Level::Info, diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 1f75bba215..ed8dfc39d3 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -33,16 +33,19 @@ mod sampler; use std::{ borrow::Borrow, - collections::HashSet, ffi::{CStr, CString}, fmt, mem, num::NonZeroU32, + ops::DerefMut, sync::Arc, }; use arrayvec::ArrayVec; use ash::{ext, khr, vk}; +use hashbrown::HashSet; use parking_lot::{Mutex, RwLock}; + +use naga::FastHashMap; use wgt::InternalCounter; const MILLIS_TO_NANOS: u64 = 1_000_000; @@ -613,7 +616,7 @@ struct FramebufferAttachment { /// Can be NULL if the framebuffer is image-less raw: vk::ImageView, raw_image_flags: vk::ImageCreateFlags, - view_usage: crate::TextureUses, + view_usage: wgt::TextureUses, view_format: wgt::TextureFormat, raw_view_formats: Vec, } @@ -641,8 +644,8 @@ struct DeviceShared { private_caps: PrivateCapabilities, workarounds: Workarounds, features: wgt::Features, - render_passes: Mutex>, - framebuffers: Mutex>, + render_passes: Mutex>, + framebuffers: Mutex>, sampler_cache: Mutex, memory_allocations_counter: InternalCounter, } @@ -761,6 +764,7 @@ pub struct Queue { device: Arc, family_index: u32, relay_semaphores: Mutex, + signal_semaphores: Mutex<(Vec, Vec)>, } impl Drop for Queue { @@ -792,7 +796,7 @@ pub struct Texture { drop_guard: Option, external_memory: Option, block: Option>, - usage: crate::TextureUses, + usage: wgt::TextureUses, format: wgt::TextureFormat, raw_flags: vk::ImageCreateFlags, copy_size: crate::CopyExtent, @@ -1212,6 +1216,15 @@ impl crate::Queue for Queue { signal_values.push(!0); } + let mut guards = self.signal_semaphores.lock(); + let (ref mut pending_signal_semaphores, ref mut pending_signal_semaphore_values) = + guards.deref_mut(); + assert!(pending_signal_semaphores.len() == pending_signal_semaphore_values.len()); + if !pending_signal_semaphores.is_empty() { + signal_semaphores.append(pending_signal_semaphores); + signal_values.append(pending_signal_semaphore_values); + } + // In order for submissions to be strictly ordered, we encode a dependency between each submission // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore. let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?; @@ -1340,6 +1353,19 @@ impl crate::Queue for Queue { } } +impl Queue { + pub fn raw_device(&self) -> &ash::Device { + &self.device.raw + } + + pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option) { + let mut guards = self.signal_semaphores.lock(); + let (ref mut semaphores, ref mut semaphore_values) = guards.deref_mut(); + semaphores.push(semaphore); + semaphore_values.push(semaphore_value.unwrap_or(!0)); + } +} + /// Maps /// /// - VK_ERROR_OUT_OF_HOST_MEMORY @@ -1454,7 +1480,7 @@ fn get_lost_err() -> crate::DeviceError { #[repr(C)] struct RawTlasInstance { transform: [f32; 12], - custom_index_and_mask: u32, + custom_data_and_mask: u32, shader_binding_table_record_offset_and_flags: u32, acceleration_structure_reference: u64, } diff --git a/wgpu-hal/src/vulkan/sampler.rs b/wgpu-hal/src/vulkan/sampler.rs index 11030226f0..5fd32c33d1 100644 --- a/wgpu-hal/src/vulkan/sampler.rs +++ b/wgpu-hal/src/vulkan/sampler.rs @@ -2,9 +2,8 @@ //! //! Nearly identical to the DX12 sampler cache, without descriptor heap management. -use std::collections::{hash_map::Entry, HashMap}; - use ash::vk; +use hashbrown::{hash_map::Entry, HashMap}; use ordered_float::OrderedFloat; /// If the allowed sampler count is above this value, the sampler cache is disabled. diff --git a/wgpu-info/Cargo.toml b/wgpu-info/Cargo.toml index 8d05139ad0..56ddf860bf 100644 --- a/wgpu-info/Cargo.toml +++ b/wgpu-info/Cargo.toml @@ -8,12 +8,17 @@ homepage.workspace = true repository.workspace = true keywords.workspace = true license.workspace = true +rust-version.workspace = true [dependencies] anyhow.workspace = true bitflags.workspace = true env_logger.workspace = true +hashbrown = { workspace = true, features = ["serde"] } pico-args.workspace = true serde = { workspace = true, features = ["default"] } serde_json.workspace = true wgpu.workspace = true + +[lints.clippy] +disallowed_types = "allow" diff --git a/wgpu-info/src/report.rs b/wgpu-info/src/report.rs index 974885d2ba..954908b09e 100644 --- a/wgpu-info/src/report.rs +++ b/wgpu-info/src/report.rs @@ -1,5 +1,6 @@ -use std::{collections::HashMap, io}; +use std::io; +use hashbrown::HashMap; use serde::{Deserialize, Serialize}; use wgpu::{ AdapterInfo, DownlevelCapabilities, Features, Limits, TextureFormat, TextureFormatFeatures, diff --git a/wgpu-macros/Cargo.toml b/wgpu-macros/Cargo.toml index 3c605e6554..97c4f31396 100644 --- a/wgpu-macros/Cargo.toml +++ b/wgpu-macros/Cargo.toml @@ -8,6 +8,7 @@ homepage.workspace = true repository.workspace = true keywords.workspace = true license.workspace = true +rust-version.workspace = true exclude = ["Cargo.lock"] publish = false diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml index e9aa95c8d1..653f988b7b 100644 --- a/wgpu-types/Cargo.toml +++ b/wgpu-types/Cargo.toml @@ -25,6 +25,14 @@ targets = [ "wasm32-unknown-unknown", ] +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(web_sys_unstable_apis)'] } + +[lints.clippy] +std_instead_of_core = "warn" +std_instead_of_alloc = "warn" +alloc_instead_of_core = "warn" + [lib] [features] @@ -36,9 +44,6 @@ serde = ["dep:serde"] # Enables some internal instrumentation for debugging purposes. counters = [] -[lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(web_sys_unstable_apis)'] } - [dependencies] bitflags = { workspace = true, features = ["serde"] } log.workspace = true @@ -62,8 +67,3 @@ web-sys = { workspace = true, default-features = false, features = [ [dev-dependencies] serde = { workspace = true, features = ["derive"] } serde_json.workspace = true - -[lints.clippy] -std_instead_of_core = "warn" -std_instead_of_alloc = "warn" -alloc_instead_of_core = "warn" diff --git a/wgpu-types/src/instance.rs b/wgpu-types/src/instance.rs index a48aabbd4a..7e417a723b 100644 --- a/wgpu-types/src/instance.rs +++ b/wgpu-types/src/instance.rs @@ -216,6 +216,8 @@ impl BackendOptions { pub struct GlBackendOptions { /// Which OpenGL ES 3 minor version to request, if using OpenGL ES. pub gles_minor_version: Gles3MinorVersion, + /// Behavior of OpenGL fences. Affects how `on_completed_work_done` and `device.poll` behave. + pub short_circuit_fences: GlFenceBehavior, } impl GlBackendOptions { @@ -225,7 +227,10 @@ impl GlBackendOptions { #[must_use] pub fn from_env_or_default() -> Self { let gles_minor_version = Gles3MinorVersion::from_env().unwrap_or_default(); - Self { gles_minor_version } + Self { + gles_minor_version, + short_circuit_fences: GlFenceBehavior::Normal, + } } /// Takes the given options, modifies them based on the environment variables, and returns the result. @@ -234,7 +239,11 @@ impl GlBackendOptions { #[must_use] pub fn with_env(self) -> Self { let gles_minor_version = self.gles_minor_version.with_env(); - Self { gles_minor_version } + let short_circuit_fences = self.short_circuit_fences.with_env(); + Self { + gles_minor_version, + short_circuit_fences, + } } } @@ -300,6 +309,14 @@ pub enum Dx12Compiler { } impl Dx12Compiler { + /// Helper function to construct a `DynamicDxc` variant with default paths. + pub fn default_dynamic_dxc() -> Self { + Self::DynamicDxc { + dxc_path: String::from("dxcompiler.dll"), + dxil_path: String::from("dxil.dll"), + } + } + /// Choose which DX12 shader compiler to use from the environment variable `WGPU_DX12_COMPILER`. /// /// Valid values, case insensitive: @@ -312,10 +329,7 @@ impl Dx12Compiler { .as_deref()? .to_lowercase(); match value.as_str() { - "dxc" | "dynamicdxc" => Some(Self::DynamicDxc { - dxc_path: String::from("dxcompiler.dll"), - dxil_path: String::from("dxil.dll"), - }), + "dxc" | "dynamicdxc" => Some(Self::default_dynamic_dxc()), "staticdxc" => Some(Self::StaticDxc), "fxc" => Some(Self::Fxc), _ => None, @@ -386,3 +400,65 @@ impl Gles3MinorVersion { } } } + +/// Dictate the behavior of fences in OpenGL. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +pub enum GlFenceBehavior { + /// Fences in OpenGL behave normally. If you don't know what to pick, this is what you want. + #[default] + Normal, + /// Fences in OpenGL are short-circuited to always return `true` immediately. + /// + /// This solves a very specific issue that arose due to a bug in wgpu-core that made + /// many WebGL programs work when they "shouldn't" have. If you have code that is trying + /// to call `device.poll(wgpu::Maintain::Wait)` on WebGL, you need to enable this option + /// for the "Wait" to behave how you would expect. + /// + /// Previously all `poll(Wait)` acted like the OpenGL fences were signalled even if they weren't. + /// See for more information. + /// + /// When this is set `Queue::on_completed_work_done` will always return the next time the device + /// is maintained, not when the work is actually done on the GPU. + AutoFinish, +} + +impl GlFenceBehavior { + /// Returns true if the fence behavior is `AutoFinish`. + pub fn is_auto_finish(&self) -> bool { + matches!(self, Self::AutoFinish) + } + + /// Returns true if the fence behavior is `Normal`. + pub fn is_normal(&self) -> bool { + matches!(self, Self::Normal) + } + + /// Choose which minor OpenGL ES version to use from the environment variable `WGPU_GL_FENCE_BEHAVIOR`. + /// + /// Possible values are `Normal` or `AutoFinish`. Case insensitive. + /// + /// Use with `unwrap_or_default()` to get the default value if the environment variable is not set. + #[must_use] + pub fn from_env() -> Option { + let value = crate::env::var("WGPU_GL_FENCE_BEHAVIOR") + .as_deref()? + .to_lowercase(); + match value.as_str() { + "normal" => Some(Self::Normal), + "autofinish" => Some(Self::AutoFinish), + _ => None, + } + } + + /// Takes the given compiler, modifies it based on the `WGPU_GL_FENCE_BEHAVIOR` environment variable, and returns the result. + /// + /// See `from_env` for more information. + #[must_use] + pub fn with_env(self) -> Self { + if let Some(fence) = Self::from_env() { + fence + } else { + self + } + } +} diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index b2e5a7cbd9..feb107bcd6 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5531,6 +5531,60 @@ bitflags::bitflags! { } } +bitflags::bitflags! { + /// Similar to `BufferUsages`, but used only for `CommandEncoder::transition_resources`. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct BufferUses: u16 { + /// The argument to a read-only mapping. + const MAP_READ = 1 << 0; + /// The argument to a write-only mapping. + const MAP_WRITE = 1 << 1; + /// The source of a hardware copy. + const COPY_SRC = 1 << 2; + /// The destination of a hardware copy. + const COPY_DST = 1 << 3; + /// The index buffer used for drawing. + const INDEX = 1 << 4; + /// A vertex buffer used for drawing. + const VERTEX = 1 << 5; + /// A uniform buffer bound in a bind group. + const UNIFORM = 1 << 6; + /// A read-only storage buffer used in a bind group. + const STORAGE_READ_ONLY = 1 << 7; + /// A read-write buffer used in a bind group. + const STORAGE_READ_WRITE = 1 << 8; + /// The indirect or count buffer in a indirect draw or dispatch. + const INDIRECT = 1 << 9; + /// A buffer used to store query results. + const QUERY_RESOLVE = 1 << 10; + /// Buffer used for acceleration structure building. + const ACCELERATION_STRUCTURE_SCRATCH = 1 << 11; + /// Buffer used for bottom level acceleration structure building. + const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; + /// Buffer used for top level acceleration structure building. + const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 13; + /// The combination of states that a buffer may be in _at the same time_. + const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() | + Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() | + Self::STORAGE_READ_ONLY.bits() | Self::INDIRECT.bits() | Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits() | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits(); + /// The combination of states that a buffer must exclusively be in. + const EXCLUSIVE = Self::MAP_WRITE.bits() | Self::COPY_DST.bits() | Self::STORAGE_READ_WRITE.bits() | Self::ACCELERATION_STRUCTURE_SCRATCH.bits(); + /// The combination of all usages that the are guaranteed to be be ordered by the hardware. + /// If a usage is ordered, then if the buffer state doesn't change between draw calls, there + /// are no barriers needed for synchronization. + const ORDERED = Self::INCLUSIVE.bits() | Self::MAP_WRITE.bits(); + } +} + +/// A buffer transition for use with `CommandEncoder::transition_resources`. +#[derive(Debug)] +pub struct BufferTransition { + /// The buffer to transition. + pub buffer: T, + /// The new state to transition to. + pub state: BufferUses, +} + /// Describes a [`Buffer`](../wgpu/struct.Buffer.html). /// /// Corresponds to [WebGPU `GPUBufferDescriptor`]( @@ -5745,6 +5799,73 @@ bitflags::bitflags! { } } +bitflags::bitflags! { + /// Similar to `TextureUsages`, but used only for `CommandEncoder::transition_resources`. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct TextureUses: u16 { + /// The texture is in unknown state. + const UNINITIALIZED = 1 << 0; + /// Ready to present image to the surface. + const PRESENT = 1 << 1; + /// The source of a hardware copy. + const COPY_SRC = 1 << 2; + /// The destination of a hardware copy. + const COPY_DST = 1 << 3; + /// Read-only sampled or fetched resource. + const RESOURCE = 1 << 4; + /// The color target of a renderpass. + const COLOR_TARGET = 1 << 5; + /// Read-only depth stencil usage. + const DEPTH_STENCIL_READ = 1 << 6; + /// Read-write depth stencil usage + const DEPTH_STENCIL_WRITE = 1 << 7; + /// Read-only storage texture usage. Corresponds to a UAV in d3d, so is exclusive, despite being read only. + const STORAGE_READ_ONLY = 1 << 8; + /// Write-only storage texture usage. + const STORAGE_WRITE_ONLY = 1 << 9; + /// Read-write storage texture usage. + const STORAGE_READ_WRITE = 1 << 10; + /// Image atomic enabled storage. + const STORAGE_ATOMIC = 1 << 11; + /// The combination of states that a texture may be in _at the same time_. + const INCLUSIVE = Self::COPY_SRC.bits() | Self::RESOURCE.bits() | Self::DEPTH_STENCIL_READ.bits(); + /// The combination of states that a texture must exclusively be in. + const EXCLUSIVE = Self::COPY_DST.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ_ONLY.bits() | Self::STORAGE_WRITE_ONLY.bits() | Self::STORAGE_READ_WRITE.bits() | Self::STORAGE_ATOMIC.bits() | Self::PRESENT.bits(); + /// The combination of all usages that the are guaranteed to be be ordered by the hardware. + /// If a usage is ordered, then if the texture state doesn't change between draw calls, there + /// are no barriers needed for synchronization. + const ORDERED = Self::INCLUSIVE.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ_ONLY.bits(); + + /// Flag used by the wgpu-core texture tracker to say a texture is in different states for every sub-resource + const COMPLEX = 1 << 12; + /// Flag used by the wgpu-core texture tracker to say that the tracker does not know the state of the sub-resource. + /// This is different from UNINITIALIZED as that says the tracker does know, but the texture has not been initialized. + const UNKNOWN = 1 << 13; + } +} + +/// A texture transition for use with `CommandEncoder::transition_resources`. +#[derive(Debug)] +pub struct TextureTransition { + /// The texture to transition. + pub texture: T, + /// An optional selector to transition only part of the texture. + /// + /// If None, the entire texture will be transitioned. + pub selector: Option, + /// The new state to transition to. + pub state: TextureUses, +} + +/// Specifies a particular set of subresources in a texture. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TextureSelector { + /// Range of mips to use. + pub mips: Range, + /// Range of layers to use. + pub layers: Range, +} + /// Defines the capabilities of a given surface and adapter. #[derive(Debug)] pub struct SurfaceCapabilities { @@ -7134,17 +7255,24 @@ impl BindingType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct BindGroupLayoutEntry { /// Binding index. Must match shader index and be unique inside a BindGroupLayout. A binding - /// of index 1, would be described as `layout(set = 0, binding = 1) uniform` in shaders. + /// of index 1, would be described as `@group(0) @binding(1)` in shaders. pub binding: u32, /// Which shader stages can see this binding. pub visibility: ShaderStages, /// The type of the binding pub ty: BindingType, - /// If this value is Some, indicates this entry is an array. Array size must be 1 or greater. - /// - /// If this value is Some and `ty` is `BindingType::Texture`, [`Features::TEXTURE_BINDING_ARRAY`] must be supported. + /// If the binding is an array of multiple resources. Corresponds to `binding_array` in the shader. + /// + /// When this is `Some` the following validation applies: + /// - Size must be of value 1 or greater. + /// - When `ty == BindingType::Texture`, [`Features::TEXTURE_BINDING_ARRAY`] must be supported. + /// - When `ty == BindingType::Sampler`, [`Features::TEXTURE_BINDING_ARRAY`] must be supported. + /// - When `ty == BindingType::Buffer`, [`Features::BUFFER_BINDING_ARRAY`] must be supported. + /// - When `ty == BindingType::Buffer` and `ty.ty == BufferBindingType::Storage`, [`Features::STORAGE_RESOURCE_BINDING_ARRAY`] must be supported. + /// - When `ty == BindingType::StorageTexture`, [`Features::STORAGE_RESOURCE_BINDING_ARRAY`] must be supported. + /// - When any binding in the group is an array, no `BindingType::Buffer` in the group may have `has_dynamic_offset == true` + /// - When any binding in the group is an array, no `BindingType::Buffer` in the group may have `ty.ty == BufferBindingType::Uniform`. /// - /// If this value is Some and `ty` is any other variant, bind group creation will fail. #[cfg_attr(feature = "serde", serde(default))] pub count: Option, } diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml index 8df1295852..a69809c645 100644 --- a/wgpu/Cargo.toml +++ b/wgpu/Cargo.toml @@ -8,6 +8,7 @@ homepage.workspace = true repository.workspace = true keywords.workspace = true license.workspace = true +rust-version.workspace = true readme = "../README.md" exclude = ["Cargo.lock"] @@ -25,7 +26,6 @@ targets = [ # Cargo machete can't check build.rs dependencies. See https://github.com/bnjbvr/cargo-machete/issues/100 ignored = ["cfg_aliases"] - [lib] [features] @@ -38,24 +38,24 @@ default = ["wgsl", "dx12", "metal", "webgpu"] #! See [#3514](https://github.com/gfx-rs/wgpu/issues/3514) for more details. ## Enables the DX12 backend on Windows. -dx12 = ["wgc?/dx12"] +dx12 = ["wgpu-core?/dx12"] ## Enables the Metal backend on macOS & iOS. -metal = ["wgc?/metal"] +metal = ["wgpu-core?/metal"] ## Enables the WebGPU backend on Wasm. Disabled when targeting `emscripten`. webgpu = ["naga?/wgsl-out"] ## Enables the GLES backend via [ANGLE](https://github.com/google/angle) on macOS using. -angle = ["wgc?/gles"] +angle = ["wgpu-core?/gles"] ## Enables the Vulkan backend on macOS & iOS. -vulkan-portability = ["wgc?/vulkan"] +vulkan-portability = ["wgpu-core?/vulkan"] ## Enables the GLES backend on Wasm ## ## * ⚠️ WIP: Currently will also enable GLES dependencies on any other targets. -webgl = ["dep:hal", "wgc/gles"] +webgl = ["dep:wgpu-hal", "wgpu-core/gles"] #! **Note:** In the documentation, if you see that an item depends on a backend, #! it means that the item is only available when that backend is enabled _and_ the backend @@ -65,13 +65,13 @@ webgl = ["dep:hal", "wgc/gles"] # -------------------------------------------------------------------- ## Enable accepting SPIR-V shaders as input. -spirv = ["naga/spv-in", "wgc/spirv"] +spirv = ["naga/spv-in", "wgpu-core?/spirv"] ## Enable accepting GLSL shaders as input. -glsl = ["naga/glsl-in", "wgc/glsl"] +glsl = ["naga/glsl-in", "wgpu-core?/glsl"] ## Enable accepting WGSL shaders as input. -wgsl = ["wgc?/wgsl"] +wgsl = ["wgpu-core?/wgsl"] ## Enable accepting naga IR shaders as input. naga-ir = ["dep:naga"] @@ -82,18 +82,18 @@ naga-ir = ["dep:naga"] ## Apply run-time checks, even in release builds. These are in addition ## to the validation carried out at public APIs in all builds. -strict_asserts = ["wgc?/strict_asserts", "wgt/strict_asserts"] +strict_asserts = ["wgpu-core?/strict_asserts", "wgpu-types/strict_asserts"] ## Enables serialization via `serde` on common wgpu types. -serde = ["dep:serde", "wgc/serde"] +serde = ["dep:serde", "wgpu-core?/serde"] # Uncomment once we get to https://github.com/gfx-rs/wgpu/issues/5974 # ## Allow writing of trace capture files. See [`Adapter::request_device`]. -# trace = ["serde", "wgc/trace"] +# trace = ["serde", "wgpu-core/trace"] ## Allow deserializing of trace capture files that were written with the `trace` feature. ## To replay a trace file use the [wgpu player](https://github.com/gfx-rs/wgpu/tree/trunk/player). -replay = ["serde", "wgc/replay"] +replay = ["serde", "wgpu-core?/replay"] #! ### Other # -------------------------------------------------------------------- @@ -101,7 +101,7 @@ replay = ["serde", "wgc/replay"] ## Internally count resources and events for debugging purposes. If the counters ## feature is disabled, the counting infrastructure is removed from the build and ## the exposed counters always return 0. -counters = ["wgc/counters"] +counters = ["wgpu-core?/counters"] ## Implement `Send` and `Sync` on Wasm, but only if atomics are not enabled. ## @@ -112,9 +112,9 @@ counters = ["wgc/counters"] ## but on a wasm binary compiled without atomics we know we are definitely ## not in a multithreaded environment. fragile-send-sync-non-atomic-wasm = [ - "hal/fragile-send-sync-non-atomic-wasm", - "wgc/fragile-send-sync-non-atomic-wasm", - "wgt/fragile-send-sync-non-atomic-wasm", + "wgpu-hal?/fragile-send-sync-non-atomic-wasm", + "wgpu-core?/fragile-send-sync-non-atomic-wasm", + "wgpu-types/fragile-send-sync-non-atomic-wasm", ] @@ -127,68 +127,21 @@ fragile-send-sync-non-atomic-wasm = [ ## must be shipped alongside `dxcompiler.dll` and `dxil.dll` (which can be downloaded from Microsoft's GitHub). ## This feature statically links a version of DXC so that no external binaries are required ## to compile DX12 shaders. -static-dxc = ["hal/static-dxc"] - -# wgpu-core is always available as an optional dependency, "wgc". -# Whenever wgpu-core is selected, we want raw window handle support. -[dependencies.wgc] -optional = true -workspace = true -features = ["raw-window-handle"] - -# wgpu-core is required whenever not targeting web APIs directly. -# Whenever wgpu-core is selected, we want raw window handle support. -[target.'cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))'.dependencies.wgc] -workspace = true -features = ["raw-window-handle"] - -# If we are not targeting WebGL, enable indirect-validation. -# WebGL doesn't support indirect execution so this is not needed. -[target.'cfg(not(target_arch = "wasm32"))'.dependencies.wgc] -workspace = true -features = ["indirect-validation"] - -# Enable `wgc` by default on macOS and iOS to allow the `metal` crate feature to -# enable the Metal backend while being no-op on other targets. -[target.'cfg(target_vendor = "apple")'.dependencies.wgc] -workspace = true - -# We want the wgpu-core Direct3D backend and OpenGL (via WGL) on Windows. -[target.'cfg(windows)'.dependencies.wgc] -workspace = true -features = ["gles"] - -# We want the wgpu-core Vulkan backend on Unix (but not emscripten, macOS, iOS) and Windows. -[target.'cfg(any(windows, all(unix, not(target_os = "emscripten"), not(target_vendor = "apple"))))'.dependencies.wgc] -workspace = true -features = ["vulkan"] - -# We want the wgpu-core GLES backend on Unix (but not macOS, iOS). -[target.'cfg(all(unix, not(target_vendor = "apple")))'.dependencies.wgc] -workspace = true -features = ["gles"] - -[dependencies.wgt] -workspace = true - -# We need wgpu-hal unless we're targeting the web APIs. -[target.'cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))'.dependencies] -hal = { workspace = true } - -[target.'cfg(all(not(target_arch = "wasm32"), unix, not(target_vendor = "apple")))'.dependencies] -hal = { workspace = true, features = ["renderdoc"] } - -[target.'cfg(windows)'.dependencies] -hal = { workspace = true, features = ["renderdoc"] } +static-dxc = ["wgpu-hal?/static-dxc"] -[target.'cfg(target_arch = "wasm32")'.dependencies.hal] -workspace = true -optional = true +######################### +# Standard Dependencies # +######################### [dependencies] +naga = { workspace = true, optional = true } +wgpu-core = { workspace = true, optional = true } +wgpu-types = { workspace = true, features = ["serde"] } + arrayvec.workspace = true bitflags.workspace = true document-features.workspace = true +hashbrown.workspace = true log.workspace = true parking_lot.workspace = true profiling.workspace = true @@ -197,19 +150,44 @@ serde = { workspace = true, features = ["default", "derive"], optional = true } smallvec.workspace = true static_assertions.workspace = true -[dependencies.naga] -workspace = true -optional = true +######################################## +# Target Specific Feature Dependencies # +######################################## -[build-dependencies] -cfg_aliases.workspace = true +# Windows +[target.'cfg(windows)'.dependencies] +wgpu-core = { workspace = true, features = [ + "raw-window-handle", + "vulkan", + "gles", +] } +wgpu-hal = { workspace = true, features = ["renderdoc"] } + +# Apple Platforms +[target.'cfg(target_vendor = "apple")'.dependencies] +wgpu-core = { workspace = true, features = ["raw-window-handle"] } +wgpu-hal = { workspace = true, features = [] } + +# Linux + Android +[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] +wgpu-core = { workspace = true, features = [ + "raw-window-handle", + "vulkan", + "gles", +] } +wgpu-hal = { workspace = true, features = ["renderdoc"] } -# used to test all the example shaders -[dev-dependencies.naga] -workspace = true -features = ["wgsl-in"] +# Webassembly +[target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies] +wgpu-core = { workspace = true, optional = true, features = [ + "raw-window-handle", +] } +wgpu-hal = { workspace = true, optional = true } -[target.'cfg(target_arch = "wasm32")'.dependencies] +js-sys = { workspace = true, features = ["default"] } +parking_lot.workspace = true +wasm-bindgen-futures.workspace = true +wasm-bindgen.workspace = true web-sys = { workspace = true, features = [ "default", "Document", @@ -227,7 +205,11 @@ web-sys = { workspace = true, features = [ "Event", "EventTarget", ] } -wasm-bindgen.workspace = true -js-sys = { workspace = true, features = ["default"] } -wasm-bindgen-futures.workspace = true -parking_lot.workspace = true + +# Emscripten +[target.'cfg(target_os = "emscripten")'.dependencies] +wgpu-core = { workspace = true, features = ["raw-window-handle", "gles"] } +wgpu-hal = { workspace = true } + +[build-dependencies] +cfg_aliases.workspace = true diff --git a/wgpu/src/api/blas.rs b/wgpu/src/api/blas.rs index 8f681f17d5..ed7a14e331 100644 --- a/wgpu/src/api/blas.rs +++ b/wgpu/src/api/blas.rs @@ -49,7 +49,7 @@ pub struct TlasInstance { /// /// This must only use the lower 24 bits, if any bits are outside that range (byte 4 does not equal 0) the TlasInstance becomes /// invalid and generates a validation error when built - pub custom_index: u32, + pub custom_data: u32, /// Mask for the instance used inside the shader to filter instances. /// Reports hit only if `(shader_cull_mask & tlas_instance.mask) != 0u`. pub mask: u8, @@ -59,7 +59,7 @@ impl TlasInstance { /// Construct TlasInstance. /// - blas: Reference to the bottom level acceleration structure /// - transform: Transform buffer offset in bytes (optional, required if transform buffer is present) - /// - custom_index: Custom index for the instance used inside the shader (max 24 bits) + /// - custom_data: Custom index for the instance used inside the shader (max 24 bits) /// - mask: Mask for the instance used inside the shader to filter instances /// /// Note: while one of these contains a reference to a BLAS that BLAS will not be dropped, @@ -67,11 +67,11 @@ impl TlasInstance { /// TlasInstance(s) will immediately make them invalid. If one or more of those invalid /// TlasInstances is inside a TlasPackage that is attempted to be built, the build will /// generate a validation error. - pub fn new(blas: &Blas, transform: [f32; 12], custom_index: u32, mask: u8) -> Self { + pub fn new(blas: &Blas, transform: [f32; 12], custom_data: u32, mask: u8) -> Self { Self { blas: blas.inner.clone(), transform, - custom_index, + custom_data, mask, } } diff --git a/wgpu/src/api/command_buffer.rs b/wgpu/src/api/command_buffer.rs index b582bf1f05..00c84af30d 100644 --- a/wgpu/src/api/command_buffer.rs +++ b/wgpu/src/api/command_buffer.rs @@ -1,7 +1,3 @@ -use std::sync::Arc; - -use parking_lot::Mutex; - use crate::*; /// Handle to a command buffer on the GPU. @@ -11,11 +7,9 @@ use crate::*; /// a [`CommandEncoder`] and then calling [`CommandEncoder::finish`]. /// /// Corresponds to [WebGPU `GPUCommandBuffer`](https://gpuweb.github.io/gpuweb/#command-buffer). -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct CommandBuffer { - pub(crate) inner: Arc>>, + pub(crate) buffer: dispatch::DispatchCommandBuffer, } #[cfg(send_sync)] static_assertions::assert_impl_all!(CommandBuffer: Send, Sync); - -crate::cmp::impl_eq_ord_hash_arc_address!(CommandBuffer => .inner); diff --git a/wgpu/src/api/command_encoder.rs b/wgpu/src/api/command_encoder.rs index 08c45f22e3..12cb955bf1 100644 --- a/wgpu/src/api/command_encoder.rs +++ b/wgpu/src/api/command_encoder.rs @@ -1,4 +1,4 @@ -use std::{ops::Range, sync::Arc}; +use std::ops::Range; use crate::{ api::{ @@ -35,7 +35,6 @@ crate::cmp::impl_eq_ord_hash_proxy!(CommandEncoder => .inner); pub type CommandEncoderDescriptor<'a> = wgt::CommandEncoderDescriptor>; static_assertions::assert_impl_all!(CommandEncoderDescriptor<'_>: Send, Sync); -use parking_lot::Mutex; pub use wgt::TexelCopyBufferInfo as TexelCopyBufferInfoBase; /// View of a buffer which can be used to copy to/from a texture. /// @@ -59,9 +58,7 @@ impl CommandEncoder { pub fn finish(mut self) -> CommandBuffer { let buffer = self.inner.finish(); - CommandBuffer { - inner: Arc::new(Mutex::new(Some(buffer))), - } + CommandBuffer { buffer } } /// Begins recording of a render pass. @@ -347,4 +344,69 @@ impl CommandEncoder { &mut tlas.into_iter(), ); } + + /// Transition resources to an underlying hal resource state. + /// + /// This is an advanced, native-only API (no-op on web) that has two main use cases: + /// + /// # Batching Barriers + /// + /// Wgpu does not have a global view of the frame when recording command buffers. When you submit multiple command buffers in a single queue submission, wgpu may need to record and + /// insert new command buffers (holding 1 or more barrier commands) in between the user-supplied command buffers in order to ensure that resources are transitioned to the correct state + /// for the start of the next user-supplied command buffer. + /// + /// Wgpu does not currently attempt to batch multiple of these generated command buffers/barriers together, which may lead to suboptimal barrier placement. + /// + /// Consider the following scenario, where the user does `queue.submit(&[a, b, c])`: + /// * CommandBuffer A: Use resource X as a render pass attachment + /// * CommandBuffer B: Use resource Y as a render pass attachment + /// * CommandBuffer C: Use resources X and Y in a bind group + /// + /// At submission time, wgpu will record and insert some new command buffers, resulting in a submission that looks like `queue.submit(&[0, a, 1, b, 2, c])`: + /// * CommandBuffer 0: Barrier to transition resource X from TextureUses::RESOURCE (from last frame) to TextureUses::COLOR_TARGET + /// * CommandBuffer A: Use resource X as a render pass attachment + /// * CommandBuffer 1: Barrier to transition resource Y from TextureUses::RESOURCE (from last frame) to TextureUses::COLOR_TARGET + /// * CommandBuffer B: Use resource Y as a render pass attachment + /// * CommandBuffer 2: Barrier to transition resources X and Y from TextureUses::COLOR_TARGET to TextureUses::RESOURCE + /// * CommandBuffer C: Use resources X and Y in a bind group + /// + /// To prevent this, after profiling their app, an advanced user might choose to instead do `queue.submit(&[a, b, c])`: + /// * CommandBuffer A: + /// * Use [`CommandEncoder::transition_resources`] to transition resources X and Y from TextureUses::RESOURCE (from last frame) to TextureUses::COLOR_TARGET + /// * Use resource X as a render pass attachment + /// * CommandBuffer B: Use resource Y as a render pass attachment + /// * CommandBuffer C: + /// * Use [`CommandEncoder::transition_resources`] to transition resources X and Y from TextureUses::COLOR_TARGET to TextureUses::RESOURCE + /// * Use resources X and Y in a bind group + /// + /// At submission time, wgpu will record and insert some new command buffers, resulting in a submission that looks like `queue.submit(&[0, a, b, 1, c])`: + /// * CommandBuffer 0: Barrier to transition resources X and Y from TextureUses::RESOURCE (from last frame) to TextureUses::COLOR_TARGET + /// * CommandBuffer A: Use resource X as a render pass attachment + /// * CommandBuffer B: Use resource Y as a render pass attachment + /// * CommandBuffer 1: Barrier to transition resources X and Y from TextureUses::COLOR_TARGET to TextureUses::RESOURCE + /// * CommandBuffer C: Use resources X and Y in a bind group + /// + /// Which eliminates the extra command buffer and barrier between command buffers A and B. + /// + /// # Native Interoperability + /// + /// A user wanting to interoperate with the underlying native graphics APIs (Vulkan, DirectX12, Metal, etc) can use this API to generate barriers between wgpu commands and + /// the native API commands, for synchronization and resource state transition purposes. + pub fn transition_resources<'a>( + &mut self, + buffer_transitions: impl Iterator>, + texture_transitions: impl Iterator>, + ) { + self.inner.transition_resources( + &mut buffer_transitions.map(|t| wgt::BufferTransition { + buffer: &t.buffer.inner, + state: t.state, + }), + &mut texture_transitions.map(|t| wgt::TextureTransition { + texture: &t.texture.inner, + selector: t.selector, + state: t.state, + }), + ); + } } diff --git a/wgpu/src/api/common_pipeline.rs b/wgpu/src/api/common_pipeline.rs index 900c20bd5c..7f07231f9d 100644 --- a/wgpu/src/api/common_pipeline.rs +++ b/wgpu/src/api/common_pipeline.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use hashbrown::HashMap; use crate::*; @@ -40,7 +40,7 @@ impl Default for PipelineCompilationOptions<'_> { /// Describes a pipeline cache, which allows reusing compilation work /// between program runs. /// -/// For use with [`Device::create_pipeline_cache`] +/// For use with [`Device::create_pipeline_cache`]. /// /// This type is unique to the Rust API of `wgpu`. #[derive(Clone, Debug)] diff --git a/wgpu/src/api/pipeline_cache.rs b/wgpu/src/api/pipeline_cache.rs index e3c8d60886..e3b7a4b043 100644 --- a/wgpu/src/api/pipeline_cache.rs +++ b/wgpu/src/api/pipeline_cache.rs @@ -5,7 +5,9 @@ use crate::*; /// in subsequent executions /// /// This reuse is only applicable for the same or similar devices. -/// See [`util::pipeline_cache_key`] for some details. +/// See [`util::pipeline_cache_key`] for some details and a suggested workflow. +/// +/// Created using [`Device::create_pipeline_cache`]. /// /// # Background /// @@ -28,6 +30,7 @@ use crate::*; /// /// # Usage /// +/// This is used as [`RenderPipelineDescriptor::cache`] or [`ComputePipelineDescriptor::cache`]. /// It is valid to use this resource when creating multiple pipelines, in /// which case it will likely cache each of those pipelines. /// It is also valid to create a new cache for each pipeline. diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index a95a972fff..b3932abfd0 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -211,12 +211,7 @@ impl Queue { &self, command_buffers: I, ) -> SubmissionIndex { - let mut command_buffers = command_buffers.into_iter().map(|comb| { - comb.inner - .lock() - .take() - .expect("Command buffer already submitted") - }); + let mut command_buffers = command_buffers.into_iter().map(|comb| comb.buffer); let index = self.inner.submit(&mut command_buffers); diff --git a/wgpu/src/api/shader_module.rs b/wgpu/src/api/shader_module.rs index e3d2f39b74..2f3e39fc9b 100644 --- a/wgpu/src/api/shader_module.rs +++ b/wgpu/src/api/shader_module.rs @@ -10,7 +10,7 @@ use crate::*; /// of a pipeline. /// /// Corresponds to [WebGPU `GPUShaderModule`](https://gpuweb.github.io/gpuweb/#shader-module). -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ShaderModule { pub(crate) inner: dispatch::DispatchShaderModule, } diff --git a/wgpu/src/api/surface.rs b/wgpu/src/api/surface.rs index 37978dd43e..66bcdbd9c8 100644 --- a/wgpu/src/api/surface.rs +++ b/wgpu/src/api/surface.rs @@ -23,12 +23,6 @@ static_assertions::assert_impl_all!(SurfaceConfiguration: Send, Sync); /// [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context) /// serves a similar role. pub struct Surface<'window> { - /// Optionally, keep the source of the handle used for the surface alive. - /// - /// This is useful for platforms where the surface is created from a window and the surface - /// would become invalid when the window is dropped. - pub(crate) _handle_source: Option>, - /// Additional surface data returned by [`DynContext::instance_create_surface`]. pub(crate) inner: dispatch::DispatchSurface, @@ -39,6 +33,14 @@ pub struct Surface<'window> { // be wrapped in a mutex and since the configuration is only supplied after the surface has // been created is is additionally wrapped in an option. pub(crate) config: Mutex>, + + /// Optionally, keep the source of the handle used for the surface alive. + /// + /// This is useful for platforms where the surface is created from a window and the surface + /// would become invalid when the window is dropped. + /// + /// SAFETY: This field must be dropped *after* all other fields to ensure proper cleanup. + pub(crate) _handle_source: Option>, } impl Surface<'_> { diff --git a/wgpu/src/api/texture.rs b/wgpu/src/api/texture.rs index 03044bdf15..8ac80e6505 100644 --- a/wgpu/src/api/texture.rs +++ b/wgpu/src/api/texture.rs @@ -37,7 +37,11 @@ impl Texture { } } - /// Creates a view of this texture. + /// Creates a view of this texture, specifying an interpretation of its texels and + /// possibly a subset of its layers and mip levels. + /// + /// Texture views are needed to use a texture as a binding in a [`BindGroup`] + /// or as an attachment in a [`RenderPass`]. pub fn create_view(&self, desc: &TextureViewDescriptor<'_>) -> TextureView { let view = self.inner.create_view(desc); diff --git a/wgpu/src/api/texture_view.rs b/wgpu/src/api/texture_view.rs index 9b3a7d9386..490e0e6eca 100644 --- a/wgpu/src/api/texture_view.rs +++ b/wgpu/src/api/texture_view.rs @@ -2,8 +2,11 @@ use crate::*; /// Handle to a texture view. /// -/// A `TextureView` object describes a texture and associated metadata needed by a -/// [`RenderPipeline`] or [`BindGroup`]. +/// A `TextureView` object refers to a [`Texture`], or a subset of its layers and mip levels, and +/// specifies an interpretation of the texture’s texels, which is needed to use a texture as a +/// binding in a [`BindGroup`] or as an attachment in a [`RenderPass`]. +/// It can be created using [`Texture::create_view()`], which accepts a [`TextureViewDescriptor`] +/// specifying the properties of the view. /// /// Corresponds to [WebGPU `GPUTextureView`](https://gpuweb.github.io/gpuweb/#gputextureview). #[derive(Debug, Clone)] diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 1b43f2b058..771814d715 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -5,10 +5,10 @@ mod ext_bindings; #[allow(clippy::allow_attributes)] mod webgpu_sys; +use hashbrown::HashMap; use js_sys::Promise; use std::{ cell::RefCell, - collections::HashMap, fmt, future::Future, ops::Range, @@ -658,7 +658,7 @@ fn map_texture_copy_view( } fn map_tagged_texture_copy_view( - view: wgt::CopyExternalImageDestInfo<&crate::api::Texture>, + view: crate::CopyExternalImageDestInfo<&crate::api::Texture>, ) -> webgpu_sys::GpuCopyExternalImageDestInfo { let texture = view.texture.inner.as_webgpu(); let mapped = webgpu_sys::GpuCopyExternalImageDestInfo::new(&texture.inner); @@ -1368,7 +1368,11 @@ pub struct WebQueueWriteBuffer { #[derive(Debug)] pub struct WebBufferMappedRange { actual_mapping: js_sys::Uint8Array, + /// Copy of the mapped data that lives in the Rust/Wasm heap instead of JS, + /// so Rust code can borrow it. temporary_mapping: Vec, + /// Whether `temporary_mapping` has possibly been written to and needs to be written back to JS. + temporary_mapping_modified: bool, /// Unique identifier for this BufferMappedRange. ident: crate::cmp::Identifier, } @@ -2554,8 +2558,8 @@ impl dispatch::QueueInterface for WebQueue { fn copy_external_image_to_texture( &self, - source: &wgt::CopyExternalImageSourceInfo, - dest: wgt::CopyExternalImageDestInfo<&crate::api::Texture>, + source: &crate::CopyExternalImageSourceInfo, + dest: crate::CopyExternalImageDestInfo<&crate::api::Texture>, size: crate::Extent3d, ) { self.inner @@ -2672,6 +2676,7 @@ impl dispatch::BufferInterface for WebBuffer { WebBufferMappedRange { actual_mapping, temporary_mapping, + temporary_mapping_modified: false, ident: crate::cmp::Identifier::create(), } .into() @@ -3103,6 +3108,18 @@ impl dispatch::CommandEncoderInterface for WebCommandEncoder { ) { unimplemented!("Raytracing not implemented for web"); } + + fn transition_resources<'a>( + &mut self, + _buffer_transitions: &mut dyn Iterator< + Item = wgt::BufferTransition<&'a dispatch::DispatchBuffer>, + >, + _texture_transitions: &mut dyn Iterator< + Item = wgt::TextureTransition<&'a dispatch::DispatchTexture>, + >, + ) { + // no-op + } } impl Drop for WebCommandEncoder { fn drop(&mut self) { @@ -3768,11 +3785,18 @@ impl dispatch::BufferMappedRangeInterface for WebBufferMappedRange { #[inline] fn slice_mut(&mut self) -> &mut [u8] { + self.temporary_mapping_modified = true; &mut self.temporary_mapping } } impl Drop for WebBufferMappedRange { fn drop(&mut self) { + if !self.temporary_mapping_modified { + // For efficiency, skip the copy if it is not needed. + // This is also how we skip copying back on *read-only* mappings. + return; + } + // Copy from the temporary mapping back into the array buffer that was // originally provided by the browser let temporary_mapping_slice = self.temporary_mapping.as_slice(); diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index ad750860bc..b2e242b6b5 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -385,7 +385,7 @@ fn map_texture_copy_view( expect(unused) )] fn map_texture_tagged_copy_view( - view: wgt::CopyExternalImageDestInfo<&api::Texture>, + view: crate::CopyExternalImageDestInfo<&api::Texture>, ) -> wgc::command::CopyExternalImageDestInfo { wgc::command::CopyExternalImageDestInfo { texture: view.texture.inner.as_core().id, @@ -854,14 +854,17 @@ impl dispatch::InstanceInterface for ContextWgpuCore { #[cfg(feature = "wgsl")] fn wgsl_language_features(&self) -> crate::WgslLanguageFeatures { - wgc::naga::front::wgsl::ImplementedLanguageExtension::all() - .iter() - .copied() - .fold( - crate::WgslLanguageFeatures::empty(), - #[expect(unreachable_code)] - |acc, wle| acc | match wle {}, - ) + use wgc::naga::front::wgsl::ImplementedLanguageExtension; + ImplementedLanguageExtension::all().iter().copied().fold( + crate::WgslLanguageFeatures::empty(), + |acc, wle| { + acc | match wle { + ImplementedLanguageExtension::PointerCompositeAccess => { + crate::WgslLanguageFeatures::PointerCompositeAccess + } + } + }, + ) } } @@ -1771,8 +1774,8 @@ impl dispatch::QueueInterface for CoreQueue { #[cfg(any(webgpu, webgl))] fn copy_external_image_to_texture( &self, - source: &wgt::CopyExternalImageSourceInfo, - dest: wgt::CopyExternalImageDestInfo<&crate::api::Texture>, + source: &crate::CopyExternalImageSourceInfo, + dest: crate::CopyExternalImageDestInfo<&crate::api::Texture>, size: crate::Extent3d, ) { match self.context.0.queue_copy_external_image_to_texture( @@ -2207,7 +2210,7 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder { self.id, &wgc::command::ComputePassDescriptor { label: desc.label.map(Borrowed), - timestamp_writes: timestamp_writes.as_ref(), + timestamp_writes, }, ); @@ -2519,7 +2522,7 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder { .map(|instance| wgc::ray_tracing::TlasInstance { blas_id: instance.blas.as_core().id, transform: &instance.transform, - custom_index: instance.custom_index, + custom_data: instance.custom_data, mask: instance.mask, }) }); @@ -2542,6 +2545,37 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder { ); } } + + fn transition_resources<'a>( + &mut self, + buffer_transitions: &mut dyn Iterator< + Item = wgt::BufferTransition<&'a dispatch::DispatchBuffer>, + >, + texture_transitions: &mut dyn Iterator< + Item = wgt::TextureTransition<&'a dispatch::DispatchTexture>, + >, + ) { + let result = self.context.0.command_encoder_transition_resources( + self.id, + buffer_transitions.map(|t| wgt::BufferTransition { + buffer: t.buffer.as_core().id, + state: t.state, + }), + texture_transitions.map(|t| wgt::TextureTransition { + texture: t.texture.as_core().id, + selector: t.selector.clone(), + state: t.state, + }), + ); + + if let Err(cause) = result { + self.context.handle_error_nolabel( + &self.error_sink, + cause, + "CommandEncoder::transition_resources", + ); + } + } } impl Drop for CoreCommandEncoder { diff --git a/wgpu/src/cmp.rs b/wgpu/src/cmp.rs index 376a9e0239..2ba35fac9a 100644 --- a/wgpu/src/cmp.rs +++ b/wgpu/src/cmp.rs @@ -65,6 +65,7 @@ macro_rules! impl_eq_ord_hash_proxy { /// ```ignore /// impl_eq_ord_hash_arc_address!(MyType => .field); /// ``` +#[cfg_attr(not(wgpu_core), expect(unused_macros))] macro_rules! impl_eq_ord_hash_arc_address { ($type:ty => $($access:tt)*) => { impl PartialEq for $type { @@ -102,4 +103,5 @@ macro_rules! impl_eq_ord_hash_arc_address { }; } +#[cfg_attr(not(wgpu_core), expect(unused_imports))] pub(crate) use {impl_eq_ord_hash_arc_address, impl_eq_ord_hash_proxy}; diff --git a/wgpu/src/dispatch.rs b/wgpu/src/dispatch.rs index a58decf65f..9ea9a33d1a 100644 --- a/wgpu/src/dispatch.rs +++ b/wgpu/src/dispatch.rs @@ -227,8 +227,8 @@ pub trait QueueInterface: CommonTraits { #[cfg(any(webgpu, webgl))] fn copy_external_image_to_texture( &self, - source: &wgt::CopyExternalImageSourceInfo, - dest: wgt::CopyExternalImageDestInfo<&crate::api::Texture>, + source: &crate::CopyExternalImageSourceInfo, + dest: crate::CopyExternalImageDestInfo<&crate::api::Texture>, size: crate::Extent3d, ); @@ -350,6 +350,12 @@ pub trait CommandEncoderInterface: CommonTraits { blas: &mut dyn Iterator>, tlas: &mut dyn Iterator, ); + + fn transition_resources<'a>( + &mut self, + buffer_transitions: &mut dyn Iterator>, + texture_transitions: &mut dyn Iterator>, + ); } pub trait ComputePassInterface: CommonTraits { fn set_pipeline(&mut self, pipeline: &DispatchComputePipeline); @@ -636,6 +642,8 @@ macro_rules! dispatch_types_inner { Self::Core(value) => value.as_ref(), #[cfg(webgpu)] Self::WebGPU(value) => value.as_ref(), + #[cfg(not(any(wgpu_core, webgpu)))] + _ => panic!("No context available. You need to enable one of wgpu's backend feature build flags."), } } } @@ -765,6 +773,8 @@ macro_rules! dispatch_types_inner { Self::Core(value) => value, #[cfg(webgpu)] Self::WebGPU(value) => value, + #[cfg(not(any(wgpu_core, webgpu)))] + _ => panic!("No context available. You need to enable one of wgpu's backend feature build flags."), } } } @@ -777,6 +787,8 @@ macro_rules! dispatch_types_inner { Self::Core(value) => value, #[cfg(webgpu)] Self::WebGPU(value) => value, + #[cfg(not(any(wgpu_core, webgpu)))] + _ => panic!("No context available. You need to enable one of wgpu's backend feature build flags."), } } } @@ -826,7 +838,7 @@ dispatch_types! { {mut type DispatchCommandEncoder = InterfaceTypes::CommandEncoder: CommandEncoderInterface}; {mut type DispatchComputePass = InterfaceTypes::ComputePass: ComputePassInterface}; {mut type DispatchRenderPass = InterfaceTypes::RenderPass: RenderPassInterface}; - {ref type DispatchCommandBuffer = InterfaceTypes::CommandBuffer: CommandBufferInterface}; + {mut type DispatchCommandBuffer = InterfaceTypes::CommandBuffer: CommandBufferInterface}; {mut type DispatchRenderBundleEncoder = InterfaceTypes::RenderBundleEncoder: RenderBundleEncoderInterface}; {ref type DispatchRenderBundle = InterfaceTypes::RenderBundle: RenderBundleInterface}; {ref type DispatchSurface = InterfaceTypes::Surface: SurfaceInterface}; diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index b83ae8c857..29cf6fe163 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -23,6 +23,13 @@ unsafe_op_in_unsafe_fn )] #![allow(clippy::arc_with_non_send_sync)] +#![cfg_attr(not(any(wgpu_core, webgpu)), allow(unused))] + +#[cfg(wgpu_core)] +pub extern crate wgpu_core as wgc; +#[cfg(wgpu_core)] +pub extern crate wgpu_hal as hal; +pub extern crate wgpu_types as wgt; // // @@ -53,11 +60,12 @@ pub use api::*; pub use wgt::{ AdapterInfo, AddressMode, AstcBlock, AstcChannel, Backend, BackendOptions, Backends, BindGroupLayoutEntry, BindingType, BlendComponent, BlendFactor, BlendOperation, BlendState, - BufferAddress, BufferBindingType, BufferSize, BufferUsages, Color, ColorTargetState, - ColorWrites, CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, CoreCounters, - DepthBiasState, DepthStencilState, DeviceLostReason, DeviceType, DownlevelCapabilities, - DownlevelFlags, Dx12BackendOptions, Dx12Compiler, DynamicOffset, Extent3d, Face, Features, - FilterMode, FrontFace, GlBackendOptions, Gles3MinorVersion, HalCounters, ImageSubresourceRange, + BufferAddress, BufferBindingType, BufferSize, BufferTransition, BufferUsages, BufferUses, + Color, ColorTargetState, ColorWrites, CommandBufferDescriptor, CompareFunction, + CompositeAlphaMode, CopyExternalImageDestInfo, CoreCounters, DepthBiasState, DepthStencilState, + DeviceLostReason, DeviceType, DownlevelCapabilities, DownlevelFlags, DownlevelLimits, + Dx12BackendOptions, Dx12Compiler, DynamicOffset, Extent3d, Face, Features, FilterMode, + FrontFace, GlBackendOptions, Gles3MinorVersion, HalCounters, ImageSubresourceRange, IndexFormat, InstanceDescriptor, InstanceFlags, InternalCounters, Limits, MaintainResult, MemoryHints, MultisampleState, Origin2d, Origin3d, PipelineStatisticsTypes, PolygonMode, PowerPreference, PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState, @@ -65,11 +73,11 @@ pub use wgt::{ SamplerBorderColor, ShaderLocation, ShaderModel, ShaderRuntimeChecks, ShaderStages, StencilFaceState, StencilOperation, StencilState, StorageTextureAccess, SurfaceCapabilities, SurfaceStatus, TexelCopyBufferLayout, TextureAspect, TextureDimension, TextureFormat, - TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType, TextureUsages, - TextureViewDimension, VertexAttribute, VertexFormat, VertexStepMode, WasmNotSend, - WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, COPY_BYTES_PER_ROW_ALIGNMENT, - MAP_ALIGNMENT, PUSH_CONSTANT_ALIGNMENT, QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES, - QUERY_SIZE, VERTEX_STRIDE_ALIGNMENT, + TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType, TextureTransition, + TextureUsages, TextureUses, TextureViewDimension, VertexAttribute, VertexFormat, + VertexStepMode, WasmNotSend, WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, + COPY_BYTES_PER_ROW_ALIGNMENT, MAP_ALIGNMENT, PUSH_CONSTANT_ALIGNMENT, + QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES, QUERY_SIZE, VERTEX_STRIDE_ALIGNMENT, }; #[expect(deprecated)] pub use wgt::{ImageCopyBuffer, ImageCopyTexture, ImageCopyTextureTagged, ImageDataLayout}; @@ -81,23 +89,6 @@ pub use wgt::ImageCopyExternalImage; #[cfg(any(webgpu, webgl))] pub use wgt::{CopyExternalImageSourceInfo, ExternalImageSource}; -// -// -// Re-exports of dependencies -// -// - -/// Re-export of our `wgpu-core` dependency. -/// -#[cfg(wgpu_core)] -pub use ::wgc as core; - -/// Re-export of our `wgpu-hal` dependency. -/// -/// -#[cfg(wgpu_core)] -pub use ::hal; - /// Re-export of our `naga` dependency. /// #[cfg(wgpu_core)] diff --git a/wgpu/src/util/device.rs b/wgpu/src/util/device.rs index 9e087cb7a2..54dc77b176 100644 --- a/wgpu/src/util/device.rs +++ b/wgpu/src/util/device.rs @@ -9,7 +9,7 @@ pub struct BufferInitDescriptor<'a> { pub contents: &'a [u8], /// Usages of a buffer. If the buffer is used in any way that isn't specified here, the operation /// will panic. - pub usage: crate::BufferUsages, + pub usage: wgt::BufferUsages, } /// Utility methods not meant to be in the main API. diff --git a/wgpu/src/util/mod.rs b/wgpu/src/util/mod.rs index 85809064d8..7d04000afd 100644 --- a/wgpu/src/util/mod.rs +++ b/wgpu/src/util/mod.rs @@ -155,16 +155,35 @@ impl std::ops::Deref for DownloadBuffer { /// /// # Examples /// -/// ``` no_run +/// ```no_run /// # use std::path::PathBuf; +/// use wgpu::PipelineCacheDescriptor; /// # let adapter_info = todo!(); -/// let cache_dir: PathBuf = PathBuf::new(); +/// # let device: wgpu::Device = todo!(); +/// let cache_dir: PathBuf = unimplemented!("Some reasonable platform-specific cache directory for your app."); /// let filename = wgpu::util::pipeline_cache_key(&adapter_info); -/// if let Some(filename) = filename { -/// let cache_file = cache_dir.join(&filename); -/// let cache_data = std::fs::read(&cache_file); -/// let pipeline_cache: wgpu::PipelineCache = todo!("Use data (if present) to create a pipeline cache"); +/// let (pipeline_cache, cache_file) = if let Some(filename) = filename { +/// let cache_path = cache_dir.join(&filename); +/// // If we failed to read the cache, for whatever reason, treat the data as lost. +/// // In a real app, we'd probably avoid caching entirely unless the error was "file not found". +/// let cache_data = std::fs::read(&cache_path).ok(); +/// let pipeline_cache = unsafe { +/// device.create_pipeline_cache(&PipelineCacheDescriptor { +/// data: cache_data.as_deref(), +/// label: None, +/// fallback: true +/// }) +/// }; +/// (Some(pipeline_cache), Some(cache_path)) +/// } else { +/// (None, None) +/// }; /// +/// // Run pipeline initialisation, making sure to set the `cache` +/// // fields of your `*PipelineDescriptor` to `pipeline_cache` +/// +/// // And then save the resulting cache (probably off the main thread). +/// if let (Some(pipeline_cache), Some(cache_file)) = (pipeline_cache, cache_file) { /// let data = pipeline_cache.get_data(); /// if let Some(data) = data { /// let temp_file = cache_file.with_extension("temp"); @@ -172,7 +191,7 @@ impl std::ops::Deref for DownloadBuffer { /// std::fs::rename(&temp_file, &cache_file)?; /// } /// } -/// # Ok::<(), std::io::Error>(()) +/// # Ok::<_, std::io::Error>(()) /// ``` /// /// [`PipelineCache`]: super::PipelineCache @@ -200,8 +219,7 @@ pub trait TextureFormatExt { /// use wgpu::util::TextureFormatExt; /// assert_eq!(wgpu::TextureFormat::from_storage_format(wgpu::naga::StorageFormat::Bgra8Unorm), wgpu::TextureFormat::Bgra8Unorm); /// ``` - #[cfg_attr(docsrs, doc(cfg(any(wgpu_core, naga))))] - #[cfg(any(wgpu_core, naga))] + #[cfg(wgpu_core)] fn from_storage_format(storage_format: crate::naga::StorageFormat) -> Self; /// Finds the [`StorageFormat`](wgc::naga::StorageFormat) corresponding to the given [`TextureFormat`](wgt::TextureFormat). @@ -214,20 +232,17 @@ pub trait TextureFormatExt { /// use wgpu::util::TextureFormatExt; /// assert_eq!(wgpu::TextureFormat::Bgra8Unorm.to_storage_format(), Some(wgpu::naga::StorageFormat::Bgra8Unorm)); /// ``` - #[cfg_attr(docsrs, doc(cfg(any(wgpu_core, naga))))] - #[cfg(any(wgpu_core, naga))] + #[cfg(wgpu_core)] fn to_storage_format(&self) -> Option; } impl TextureFormatExt for wgt::TextureFormat { - #[cfg_attr(docsrs, doc(cfg(any(wgpu_core, naga))))] - #[cfg(any(wgpu_core, naga))] + #[cfg(wgpu_core)] fn from_storage_format(storage_format: crate::naga::StorageFormat) -> Self { wgc::map_storage_format_from_naga(storage_format) } - #[cfg_attr(docsrs, doc(cfg(any(wgpu_core, naga))))] - #[cfg(any(wgpu_core, naga))] + #[cfg(wgpu_core)] fn to_storage_format(&self) -> Option { wgc::map_storage_format_to_naga(*self) } diff --git a/wgpu/src/util/texture_blitter.rs b/wgpu/src/util/texture_blitter.rs index e7ef11f925..2ee6c85d5e 100644 --- a/wgpu/src/util/texture_blitter.rs +++ b/wgpu/src/util/texture_blitter.rs @@ -156,6 +156,12 @@ pub struct TextureBlitter { impl TextureBlitter { /// Returns a [`TextureBlitter`] with default settings. + /// + /// # Arguments + /// - `device` - A [`Device`] + /// - `format` - The [`TextureFormat`] of the texture that will be copied to. This has to have the `RENDER_TARGET` usage. + /// + /// Properties of the blitting (such as the [`BlendState`]) can be customised by using [`TextureBlitterBuilder`] instead. pub fn new(device: &Device, format: TextureFormat) -> Self { TextureBlitterBuilder::new(device, format).build() } diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml index 502a596557..3f1022efbe 100644 --- a/xtask/Cargo.toml +++ b/xtask/Cargo.toml @@ -2,6 +2,7 @@ name = "xtask" version = "0.1.0" edition = "2021" +rust-version = "1.83" publish = false [dependencies] diff --git a/xtask/src/check_feature_dependencies.rs b/xtask/src/check_feature_dependencies.rs new file mode 100644 index 0000000000..6601bc63b9 --- /dev/null +++ b/xtask/src/check_feature_dependencies.rs @@ -0,0 +1,114 @@ +use pico_args::Arguments; +use xshell::Shell; + +#[derive(Debug)] +enum Search<'a> { + #[expect(dead_code)] + Positive(&'a str), + Negative(&'a str), +} + +#[derive(Debug)] +struct Requirement<'a> { + human_readable_name: &'a str, + target: &'a str, + packages: &'a [&'a str], + features: &'a [&'a str], + default_features: bool, + search_terms: &'a [Search<'a>], +} + +const ALL_WGPU_FEATURES: &[&str] = &[ + "dx12", + "metal", + "webgpu", + "angle", + "vulkan-portability", + "webgl", + "spirv", + "glsl", + "wgsl", + "naga-ir", + "serde", + "replay", + "counters", + "fragile-send-sync-non-atomic-wasm", + "static-dxc", +]; + +pub fn check_feature_dependencies(shell: Shell, arguments: Arguments) -> anyhow::Result<()> { + let mut _args = arguments.finish(); + + let features_no_webgl: Vec<&str> = ALL_WGPU_FEATURES + .iter() + .copied() + .filter(|feature| *feature != "webgl") + .collect(); + + let requirements = [ + Requirement { + human_readable_name: "wasm32 without `webgl` feature does not depend on `wgpu-core`", + target: "wasm32-unknown-unknown", + packages: &["wgpu"], + features: &features_no_webgl, + default_features: false, + search_terms: &[Search::Negative("wgpu-core")], + }, + Requirement { + human_readable_name: + "wasm32 with `webgpu` and `wgsl` feature does not depend on `naga`", + target: "wasm32-unknown-unknown", + packages: &["wgpu"], + features: &["webgpu", "wgsl"], + default_features: false, + search_terms: &[Search::Negative("naga")], + }, + ]; + + let mut any_failures = false; + for requirement in requirements { + let mut cmd = shell + .cmd("cargo") + .args(["tree", "--target", requirement.target]); + + for package in requirement.packages { + cmd = cmd.arg("--package").arg(package); + } + + if !requirement.default_features { + cmd = cmd.arg("--no-default-features"); + } + + if !requirement.features.is_empty() { + cmd = cmd.arg("--features").arg(requirement.features.join(",")); + } + + log::info!("Checking Requirement: {}", requirement.human_readable_name); + log::debug!("{:#?}", requirement); + log::debug!("$ {cmd}"); + + let output = cmd.read()?; + + log::debug!("{output}"); + + for search_term in requirement.search_terms { + let found = match search_term { + Search::Positive(search_term) => output.contains(search_term), + Search::Negative(search_term) => !output.contains(search_term), + }; + + if found { + log::info!("✅ Passed!"); + } else { + log::info!("❌ Failed"); + any_failures = true; + } + } + } + + if any_failures { + anyhow::bail!("Some feature dependencies are not met"); + } + + Ok(()) +} diff --git a/xtask/src/main.rs b/xtask/src/main.rs index f173fe9690..8cfc12f70d 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -3,6 +3,7 @@ use std::process::ExitCode; use anyhow::Context; use pico_args::Arguments; +mod check_feature_dependencies; mod run_wasm; mod test; mod util; @@ -12,6 +13,9 @@ const HELP: &str = "\ Usage: xtask Commands: + check-feature-dependencies + Check certain dependency invariants are upheld. + run-wasm Build and run web examples @@ -71,6 +75,9 @@ fn main() -> anyhow::Result { shell.change_dir(String::from(env!("CARGO_MANIFEST_DIR")) + "/.."); match subcommand.as_deref() { + Some("check-feature-dependencies") => { + check_feature_dependencies::check_feature_dependencies(shell, args)? + } Some("run-wasm") => run_wasm::run_wasm(shell, args)?, Some("test") => test::run_tests(shell, args)?, Some("vendor-web-sys") => vendor_web_sys::run_vendor_web_sys(shell, args)?, diff --git a/xtask/src/run_wasm.rs b/xtask/src/run_wasm.rs index a9e8e3c9d0..d8048b3ca8 100644 --- a/xtask/src/run_wasm.rs +++ b/xtask/src/run_wasm.rs @@ -6,28 +6,22 @@ use xshell::Shell; use crate::util::{check_all_programs, Program}; pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> anyhow::Result<()> { - let no_serve = args.contains("--no-serve"); + let should_serve = !args.contains("--no-serve"); let release = args.contains("--release"); - let programs_needed: &[_] = if no_serve { - &[Program { - crate_name: "wasm-bindgen-cli", - binary_name: "wasm-bindgen", - }] - } else { - &[ - Program { - crate_name: "wasm-bindgen-cli", - binary_name: "wasm-bindgen", - }, - Program { - crate_name: "simple-http-server", - binary_name: "simple-http-server", - }, - ] - }; - - check_all_programs(programs_needed)?; + let mut programs_needed = vec![Program { + crate_name: "wasm-bindgen-cli", + binary_name: "wasm-bindgen", + }]; + + if should_serve { + programs_needed.push(Program { + crate_name: "simple-http-server", + binary_name: "simple-http-server", + }); + } + + check_all_programs(&programs_needed)?; let release_flag: &[_] = if release { &["--release"] } else { &[] }; let output_dir = if release { "release" } else { "debug" }; @@ -77,7 +71,7 @@ pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> anyhow::Result<()> .context("Failed to run wasm-bindgen")?; let static_files = shell - .read_dir("examples/static") + .read_dir("examples/features/web-static") .context("Failed to enumerate static files")?; for file in static_files { @@ -91,7 +85,7 @@ pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> anyhow::Result<()> .with_context(|| format!("Failed to copy static file \"{}\"", file.display()))?; } - if !no_serve { + if should_serve { log::info!("serving on port 8000"); // Explicitly specify the IP address to 127.0.0.1 since otherwise simple-http-server will