diff --git a/panda/src/display/displayRegion.cxx b/panda/src/display/displayRegion.cxx index 84c852d472d..df0e7a7292b 100644 --- a/panda/src/display/displayRegion.cxx +++ b/panda/src/display/displayRegion.cxx @@ -487,7 +487,9 @@ get_screenshot() { if (gsg->get_threading_model().get_draw_stage() != current_thread->get_pipeline_stage()) { // Ask the engine to do on the draw thread. GraphicsEngine *engine = window->get_engine(); - return engine->do_get_screenshot(this, gsg); + return engine->run_on_draw_thread([this] { + return get_screenshot(); + }); } // We are on the draw thread. diff --git a/panda/src/display/graphicsEngine.I b/panda/src/display/graphicsEngine.I index e9fc42ce44d..ca9142a8a81 100644 --- a/panda/src/display/graphicsEngine.I +++ b/panda/src/display/graphicsEngine.I @@ -171,3 +171,53 @@ INLINE void GraphicsEngine:: dispatch_compute(const LVecBase3i &work_groups, const ShaderAttrib *sattr, GraphicsStateGuardian *gsg) { dispatch_compute(work_groups, RenderState::make(sattr), gsg); } + +#ifndef CPPPARSER +/** + * Waits for the draw thread to become idle, then runs the given function on it. + */ +template +INLINE auto GraphicsEngine:: +run_on_draw_thread(Callable &&callable) -> decltype(callable()) { + ReMutexHolder holder(_lock); + std::string draw_name = _threading_model.get_draw_name(); + if (draw_name.empty()) { + return std::move(callable)(); + } else { + WindowRenderer *wr = get_window_renderer(draw_name, 0); + RenderThread *thread = (RenderThread *)wr; + return thread->run_on_thread(std::move(callable)); + } +} + +/** + * Waits for this thread to become idle, then runs the given function on it. + */ +template +INLINE auto GraphicsEngine::RenderThread:: +run_on_thread(Callable &&callable) -> + typename std::enable_if::value, decltype(callable())>::type { + + using ReturnType = decltype(callable()); + alignas(ReturnType) unsigned char storage[sizeof(ReturnType)]; + + run_on_thread([] (RenderThread *data) { + new (data->_return_data) ReturnType(std::move(*(Callable *)data->_callback_data)()); + }, &callable, storage); + + return *(ReturnType *)storage; +} + +/** + * Waits for this thread to become idle, then runs the given function on it. + */ +template +INLINE auto GraphicsEngine::RenderThread:: +run_on_thread(Callable &&callable) -> + typename std::enable_if::value, decltype(callable())>::type { + + run_on_thread([] (RenderThread *data) { + std::move(*(Callable *)data->_callback_data)(); + }, &callable, nullptr); +} +#endif // CPPPARSER diff --git a/panda/src/display/graphicsEngine.cxx b/panda/src/display/graphicsEngine.cxx index 5fab08e1551..474f8e29347 100644 --- a/panda/src/display/graphicsEngine.cxx +++ b/panda/src/display/graphicsEngine.cxx @@ -1134,47 +1134,9 @@ flip_frame() { */ bool GraphicsEngine:: extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg) { - ReMutexHolder holder(_lock); - - string draw_name = gsg->get_threading_model().get_draw_name(); - if (draw_name.empty()) { - // A single-threaded environment. No problem. + return run_on_draw_thread([=] () { return gsg->extract_texture_data(tex); - - } else { - // A multi-threaded environment. We have to wait until the draw thread - // has finished its current task. - WindowRenderer *wr = get_window_renderer(draw_name, 0); - RenderThread *thread = (RenderThread *)wr; - MutexHolder cv_holder(thread->_cv_mutex); - - while (thread->_thread_state != TS_wait) { - thread->_cv_done.wait(); - } - - // Temporarily set this so that it accesses data from the current thread. - int pipeline_stage = Thread::get_current_pipeline_stage(); - int draw_pipeline_stage = thread->get_pipeline_stage(); - thread->set_pipeline_stage(pipeline_stage); - - // Now that the draw thread is idle, signal it to do the extraction task. - thread->_gsg = gsg; - thread->_texture = tex; - thread->_thread_state = TS_do_extract_texture_data; - thread->_cv_mutex.release(); - thread->_cv_start.notify(); - thread->_cv_mutex.acquire(); - - // Wait for it to finish the extraction. - while (thread->_thread_state != TS_wait) { - thread->_cv_done.wait(); - } - - thread->set_pipeline_stage(draw_pipeline_stage); - thread->_gsg = nullptr; - thread->_texture = nullptr; - return thread->_result; - } + }); } /** @@ -1189,56 +1151,13 @@ extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg) { */ vector_uchar GraphicsEngine:: extract_shader_buffer_data(ShaderBuffer *buffer, GraphicsStateGuardian *gsg) { - ReMutexHolder holder(_lock); - - string draw_name = gsg->get_threading_model().get_draw_name(); - if (draw_name.empty()) { - // A single-threaded environment. No problem. + return run_on_draw_thread([=] () { vector_uchar data; if (!gsg->extract_shader_buffer_data(buffer, data)) { data.clear(); } return data; - } - - // A multi-threaded environment. We have to wait until the draw thread - // has finished its current task. - WindowRenderer *wr = get_window_renderer(draw_name, 0); - RenderThread *thread = (RenderThread *)wr; - MutexHolder cv_holder(thread->_cv_mutex); - - while (thread->_thread_state != TS_wait) { - thread->_cv_done.wait(); - } - - // Temporarily set this so that it accesses data from the current thread. - int pipeline_stage = Thread::get_current_pipeline_stage(); - int draw_pipeline_stage = thread->get_pipeline_stage(); - thread->set_pipeline_stage(pipeline_stage); - - // Now that the draw thread is idle, signal it to do the extraction task. - vector_uchar data; - thread->_gsg = gsg; - thread->_buffer = buffer; - thread->_buffer_result = &data; - thread->_thread_state = TS_do_extract_shader_buffer_data; - thread->_cv_mutex.release(); - thread->_cv_start.notify(); - thread->_cv_mutex.acquire(); - - // Wait for it to finish the extraction. - while (thread->_thread_state != TS_wait) { - thread->_cv_done.wait(); - } - - thread->set_pipeline_stage(draw_pipeline_stage); - thread->_gsg = nullptr; - thread->_buffer = nullptr; - thread->_buffer_result = nullptr; - if (!thread->_result) { - data.clear(); - } - return data; + }); } /** @@ -1263,50 +1182,12 @@ dispatch_compute(const LVecBase3i &work_groups, const RenderState *state, Graphi nassertv(shader != nullptr); nassertv(gsg != nullptr); - ReMutexHolder holder(_lock); - - string draw_name = gsg->get_threading_model().get_draw_name(); - if (draw_name.empty()) { - // A single-threaded environment. No problem. + run_on_draw_thread([=] () { gsg->push_group_marker(std::string("Compute ") + shader->get_filename(Shader::ST_compute).get_basename()); gsg->set_state_and_transform(state, TransformState::make_identity()); gsg->dispatch_compute(work_groups[0], work_groups[1], work_groups[2]); gsg->pop_group_marker(); - - } else { - // A multi-threaded environment. We have to wait until the draw thread - // has finished its current task. - WindowRenderer *wr = get_window_renderer(draw_name, 0); - RenderThread *thread = (RenderThread *)wr; - MutexHolder cv_holder(thread->_cv_mutex); - - while (thread->_thread_state != TS_wait) { - thread->_cv_done.wait(); - } - - // Temporarily set this so that it accesses data from the current thread. - int pipeline_stage = Thread::get_current_pipeline_stage(); - int draw_pipeline_stage = thread->get_pipeline_stage(); - thread->set_pipeline_stage(pipeline_stage); - - // Now that the draw thread is idle, signal it to do the compute task. - thread->_gsg = gsg; - thread->_state = state; - thread->_work_groups = work_groups; - thread->_thread_state = TS_do_compute; - thread->_cv_mutex.release(); - thread->_cv_start.notify(); - thread->_cv_mutex.acquire(); - - // Wait for it to finish the compute task. - while (thread->_thread_state != TS_wait) { - thread->_cv_done.wait(); - } - - thread->set_pipeline_stage(draw_pipeline_stage); - thread->_gsg = nullptr; - thread->_state = nullptr; - } + }); } /** @@ -1342,43 +1223,6 @@ texture_uploaded(Texture *tex) { // Usually only called by DisplayRegion::do_cull. } -/** - * Called by DisplayRegion::do_get_screenshot - */ -PT(Texture) GraphicsEngine:: -do_get_screenshot(DisplayRegion *region, GraphicsStateGuardian *gsg) { - // A multi-threaded environment. We have to wait until the draw thread - // has finished its current task. - - ReMutexHolder holder(_lock); - - const std::string &draw_name = gsg->get_threading_model().get_draw_name(); - WindowRenderer *wr = get_window_renderer(draw_name, 0); - RenderThread *thread = (RenderThread *)wr; - MutexHolder cv_holder(thread->_cv_mutex); - - while (thread->_thread_state != TS_wait) { - thread->_cv_done.wait(); - } - - // Now that the draw thread is idle, signal it to do the extraction task. - thread->_region = region; - thread->_thread_state = TS_do_screenshot; - thread->_cv_mutex.release(); - thread->_cv_start.notify(); - thread->_cv_mutex.acquire(); - - // Wait for it to finish the extraction. - while (thread->_thread_state != TS_wait) { - thread->_cv_done.wait(); - } - - PT(Texture) tex = std::move(thread->_texture); - thread->_region = nullptr; - thread->_texture = nullptr; - return tex; -} - /** * Fires off a cull traversal using the indicated camera. */ @@ -2867,31 +2711,9 @@ thread_main() { do_pending(_engine, current_thread); break; - case TS_do_compute: - nassertd(_gsg != nullptr && _state != nullptr) break; - { - const ShaderAttrib *sattr; - _state->get_attrib(sattr); - _gsg->push_group_marker(std::string("Compute ") + sattr->get_shader()->get_filename(Shader::ST_compute).get_basename()); - _gsg->set_state_and_transform(_state, TransformState::make_identity()); - _gsg->dispatch_compute(_work_groups[0], _work_groups[1], _work_groups[2]); - _gsg->pop_group_marker(); - } - break; - - case TS_do_extract_texture_data: - nassertd(_gsg != nullptr && _texture != nullptr) break; - _result = _gsg->extract_texture_data(_texture); - break; - - case TS_do_extract_shader_buffer_data: - nassertd(_gsg != nullptr && _texture != nullptr) break; - _result = _gsg->extract_shader_buffer_data(_buffer, *_buffer_result); - break; - - case TS_do_screenshot: - nassertd(_region != nullptr) break; - _texture = _region->get_screenshot(); + case TS_callback: + nassertd(_callback != nullptr) break; + _callback(this); break; case TS_terminate: @@ -2916,3 +2738,39 @@ thread_main() { } } } + +/** + * Waits for this thread to become idle, then runs the given function on it. + */ +void GraphicsEngine::RenderThread:: +run_on_thread(Callback *callback, void *callback_data, void *return_data) { + MutexHolder cv_holder(_cv_mutex); + + while (_thread_state != TS_wait) { + _cv_done.wait(); + } + + // Temporarily set this so that it accesses data from the current thread. + int pipeline_stage = Thread::get_current_pipeline_stage(); + int thread_pipeline_stage = get_pipeline_stage(); + set_pipeline_stage(pipeline_stage); + + // Now that the draw thread is idle, signal it to run the callback. + _callback = callback; + _callback_data = callback_data; + _return_data = return_data; + _thread_state = TS_callback; + _cv_mutex.release(); + _cv_start.notify(); + _cv_mutex.acquire(); + + // Wait for it to finish the job. + while (_thread_state != TS_wait) { + _cv_done.wait(); + } + + set_pipeline_stage(thread_pipeline_stage); + _callback = nullptr; + _callback_data = nullptr; + _return_data = nullptr; +} diff --git a/panda/src/display/graphicsEngine.h b/panda/src/display/graphicsEngine.h index 16034a123a5..88874f8e509 100644 --- a/panda/src/display/graphicsEngine.h +++ b/panda/src/display/graphicsEngine.h @@ -35,6 +35,8 @@ #include "renderState.h" #include "clockObject.h" +#include + class Pipeline; class DisplayRegion; class GraphicsPipe; @@ -130,16 +132,17 @@ class EXPCL_PANDA_DISPLAY GraphicsEngine : public ReferenceCount { TS_do_flip, TS_do_release, TS_do_windows, - TS_do_compute, - TS_do_extract_texture_data, - TS_do_extract_shader_buffer_data, - TS_do_screenshot, + TS_callback, TS_terminate, TS_done }; void texture_uploaded(Texture *tex); - PT(Texture) do_get_screenshot(DisplayRegion *region, GraphicsStateGuardian *gsg); + +#ifndef CPPPARSER + template + INLINE auto run_on_draw_thread(Callable &&callable) -> decltype(callable()); +#endif public: static void do_cull(CullHandler *cull_handler, SceneSetup *scene_setup, @@ -306,21 +309,31 @@ class EXPCL_PANDA_DISPLAY GraphicsEngine : public ReferenceCount { RenderThread(const std::string &name, GraphicsEngine *engine); virtual void thread_main(); + typedef void Callback(RenderThread *thread); + void run_on_thread(Callback *callback, + void *callback_data = nullptr, + void *return_data = nullptr); + +#ifndef CPPPARSER + template + INLINE auto run_on_thread(Callable &&callable) -> + typename std::enable_if::value, decltype(callable())>::type; + + template + INLINE auto run_on_thread(Callable &&callable) -> + typename std::enable_if::value, decltype(callable())>::type; +#endif + GraphicsEngine *_engine; Mutex _cv_mutex; ConditionVar _cv_start; ConditionVar _cv_done; ThreadState _thread_state; - // These are stored for extract_texture_data and dispatch_compute. - GraphicsStateGuardian *_gsg; - PT(Texture) _texture; - ShaderBuffer *_buffer; - vector_uchar *_buffer_result; - const RenderState *_state; - DisplayRegion *_region; - LVecBase3i _work_groups; - bool _result; + // Used for TS_callback. + Callback *_callback; + void *_callback_data; + void *_return_data; }; WindowRenderer *get_window_renderer(const std::string &name, int pipeline_stage); diff --git a/panda/src/display/graphicsStateGuardian.cxx b/panda/src/display/graphicsStateGuardian.cxx index 9be45d34eff..80c02f88bb7 100644 --- a/panda/src/display/graphicsStateGuardian.cxx +++ b/panda/src/display/graphicsStateGuardian.cxx @@ -572,6 +572,23 @@ update_texture(TextureContext *, bool) { return true; } +/** + * Ensures that the current Texture data is refreshed onto the GSG. This + * means updating the texture properties and/or re-uploading the texture + * image, if necessary. This should only be called within the draw thread. + * + * If force is true, this function will not return until the texture has been + * fully uploaded. If force is false, the function may choose to upload a + * simple version of the texture instead, if the texture is not fully resident + * (and if get_incomplete_render() is true). + */ +bool GraphicsStateGuardian:: +update_texture(TextureContext *tc, bool force, CompletionToken token) { + bool result = update_texture(tc, force); + token.complete(result); + return result; +} + /** * Frees the resources previously allocated via a call to prepare_texture(), * including deleting the TextureContext itself, if it is non-NULL. diff --git a/panda/src/display/graphicsStateGuardian.h b/panda/src/display/graphicsStateGuardian.h index 3ed65b8e31c..039572a7afd 100644 --- a/panda/src/display/graphicsStateGuardian.h +++ b/panda/src/display/graphicsStateGuardian.h @@ -292,6 +292,7 @@ class EXPCL_PANDA_DISPLAY GraphicsStateGuardian : public GraphicsStateGuardianBa public: virtual TextureContext *prepare_texture(Texture *tex); virtual bool update_texture(TextureContext *tc, bool force); + virtual bool update_texture(TextureContext *tc, bool force, CompletionToken token); virtual void release_texture(TextureContext *tc); virtual void release_textures(const pvector &contexts); virtual bool extract_texture_data(Texture *tex); diff --git a/panda/src/event/asyncFuture.cxx b/panda/src/event/asyncFuture.cxx index d8b8b91d979..a6aa341c07b 100644 --- a/panda/src/event/asyncFuture.cxx +++ b/panda/src/event/asyncFuture.cxx @@ -389,6 +389,17 @@ wake_task(AsyncTask *task) { } } +/** + * Internal callback called when a CompletionToken created from this future + * completes. + */ +void AsyncFuture:: +token_callback(Completable::Data *data, bool success) { + AsyncFuture *future = (AsyncFuture *)data; + future->set_result(EventParameter(success)); + unref_delete(future); +} + /** * @see AsyncFuture::gather */ diff --git a/panda/src/event/asyncFuture.h b/panda/src/event/asyncFuture.h index acb6b1020fb..f9f71472f8d 100644 --- a/panda/src/event/asyncFuture.h +++ b/panda/src/event/asyncFuture.h @@ -20,6 +20,7 @@ #include "eventParameter.h" #include "patomic.h" #include "small_vector.h" +#include "completionToken.h" class AsyncTaskManager; class AsyncTask; @@ -58,7 +59,7 @@ class AsyncTask; * * @since 1.10.0 */ -class EXPCL_PANDA_EVENT AsyncFuture : public TypedReferenceCount { +class EXPCL_PANDA_EVENT AsyncFuture : public TypedReferenceCount, protected Completable::Data { PUBLISHED: INLINE AsyncFuture(); virtual ~AsyncFuture(); @@ -109,6 +110,8 @@ class EXPCL_PANDA_EVENT AsyncFuture : public TypedReferenceCount { private: void wake_task(AsyncTask *task); + static void token_callback(Completable::Data *, bool success); + protected: enum FutureState : patomic_unsigned_lock_free::value_type { // Pending states @@ -136,6 +139,7 @@ class EXPCL_PANDA_EVENT AsyncFuture : public TypedReferenceCount { friend class AsyncGatheringFuture; friend class AsyncTaskChain; + friend class CompletionToken; friend class PythonTask; public: @@ -199,6 +203,33 @@ class EXPCL_PANDA_EVENT AsyncGatheringFuture final : public AsyncFuture { static TypeHandle _type_handle; }; +#ifndef CPPPARSER +// Allow passing a future into a method accepting a CompletionToken. +template<> +INLINE CompletionToken:: +CompletionToken(AsyncFuture *future) { + if (future != nullptr) { + future->ref(); + _callback._data = future; + if (_callback._data->_function == nullptr) { + _callback._data->_function = &AsyncFuture::token_callback; + } + } +} + +template<> +INLINE CompletionToken:: +CompletionToken(PT(AsyncFuture) future) { + if (future != nullptr) { + _callback._data = future; + if (_callback._data->_function == nullptr) { + _callback._data->_function = &AsyncFuture::token_callback; + } + future.cheat() = nullptr; + } +} +#endif + #include "asyncFuture.I" #endif // !ASYNCFUTURE_H diff --git a/panda/src/gles2gsg/gles2gsg.h b/panda/src/gles2gsg/gles2gsg.h index c43beded29d..feb30b41a05 100644 --- a/panda/src/gles2gsg/gles2gsg.h +++ b/panda/src/gles2gsg/gles2gsg.h @@ -148,6 +148,10 @@ typedef char GLchar; #define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x800 #define GL_ATOMIC_COUNTER_BARRIER_BIT 0x1000 #define GL_SHADER_STORAGE_BARRIER_BIT 0x2000 +#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004 +#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008 +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020 #define GL_HALF_FLOAT 0x140B #define GL_COLOR 0x1800 #define GL_DEPTH 0x1801 diff --git a/panda/src/glstuff/glGraphicsBuffer_src.cxx b/panda/src/glstuff/glGraphicsBuffer_src.cxx index dde857eb17f..a97818448d5 100644 --- a/panda/src/glstuff/glGraphicsBuffer_src.cxx +++ b/panda/src/glstuff/glGraphicsBuffer_src.cxx @@ -281,7 +281,7 @@ begin_frame(FrameMode mode, Thread *current_thread) { CLP(GraphicsStateGuardian) *glgsg = (CLP(GraphicsStateGuardian) *)_gsg.p(); for (CLP(TextureContext) *gtc : _texture_contexts) { - if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) { + if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT, true)) { glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT); // If we've done it for one, we've done it for all. break; @@ -1973,7 +1973,7 @@ resolve_multisamples() { // Issue memory barriers as necessary to make sure that the texture memory // is synchronized before we blit to it. for (CLP(TextureContext) *gtc : _texture_contexts) { - if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) { + if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT, true)) { glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT); // If we've done it for one, we've done it for all. break; diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx index 9988da71d07..b29387463b9 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx @@ -68,6 +68,7 @@ #include "shaderGenerator.h" #include "samplerState.h" #include "displayInformation.h" +#include "completionCounter.h" #if defined(HAVE_CG) && !defined(OPENGLES) #include @@ -97,6 +98,10 @@ PStatCollector CLP(GraphicsStateGuardian)::_check_residency_pcollector("*:PStats PStatCollector CLP(GraphicsStateGuardian)::_wait_fence_pcollector("Wait:Fence"); PStatCollector CLP(GraphicsStateGuardian)::_copy_texture_finish_pcollector("Draw:Copy texture:Finish"); +static PStatCollector _create_texture_storage_pcollector("Draw:Transfer data:Texture:Create Storage"); +static PStatCollector _create_map_pbo_pcollector("Draw:Transfer data:Texture:Create/Map PBO"); +static PStatCollector _load_texture_copy_pcollector("Draw:Transfer data:Texture:Copy/Convert"); + #if defined(HAVE_CG) && !defined(OPENGLES) AtomicAdjust::Integer CLP(GraphicsStateGuardian)::_num_gsgs_with_cg_contexts = 0; small_vector CLP(GraphicsStateGuardian)::_destroyed_cg_contexts; @@ -326,6 +331,23 @@ uchar_l_to_rgb(unsigned char *dest, const unsigned char *source, } } +/** + * Recopies the given array of pixels, converting from luminance to RGBA + * arrangement. + */ +static void +uchar_l_to_rgba(unsigned char *dest, const unsigned char *source, + int num_pixels) { + for (int i = 0; i < num_pixels; i++) { + dest[0] = source[0]; + dest[1] = source[0]; + dest[2] = source[0]; + dest[3] = 1; + dest += 4; + source += 1; + } +} + /** * Recopies the given array of pixels, converting from BGRA to RGBA * arrangement. @@ -426,78 +448,176 @@ ushort_la_to_rgba(unsigned short *dest, const unsigned short *source, } /** - * Reverses the order of the components within the image, to convert (for - * instance) GL_BGR to GL_RGB. Returns the byte pointer representing the - * converted image, or the original image if it is unchanged. - * - * new_image must be supplied; it is the PTA_uchar that will be used to hold - * the converted image if required. It will be modified only if the - * conversion is necessary, in which case the data will be stored there, and - * this pointer will be returned. If the conversion is not necessary, this - * pointer will be left unchanged. + * Determines the number of components of the given external format. */ -static const unsigned char * -fix_component_ordering(PTA_uchar &new_image, - const unsigned char *orig_image, size_t orig_image_size, - GLenum external_format, Texture *tex) { - const unsigned char *result = orig_image; +static int +get_external_format_components(GLint external_format) { + switch (external_format) { +#ifndef OPENGLES_1 + case GL_RED: + case GL_RED_INTEGER: + case GL_GREEN: + case GL_BLUE: +#endif + case GL_ALPHA: + case GL_LUMINANCE: +#ifndef OPENGLES + case GL_GREEN_INTEGER: + case GL_BLUE_INTEGER: + case GL_ALPHA_INTEGER: +#endif + return 1; + +#ifndef OPENGLES_1 + case GL_RG: + case GL_RG_INTEGER: +#endif + case GL_LUMINANCE_ALPHA: + return 2; + + case GL_RGB: +#ifndef OPENGLES_1 + case GL_RGB_INTEGER: +#endif +#ifndef OPENGLES + case GL_BGR: + case GL_BGR_INTEGER: +#endif + return 3; + case GL_RGBA: +#ifndef OPENGLES_1 + case GL_RGBA_INTEGER: +#endif + case GL_BGRA: +#ifndef OPENGLES + case GL_BGRA_INTEGER: +#endif + return 4; + + default: + GLCAT.error() + << "Unknown external format 0x" << std::hex << external_format + << std::dec << "\n"; + return 4; + } +} + +/** + * Copies the image with optional conversion. + */ +static void +copy_image(unsigned char *new_image, const unsigned char *orig_image, + size_t orig_image_size, GLint external_format, int num_components, + int component_width) { switch (external_format) { +#ifndef OPENGLES_1 + case GL_RED: + case GL_RED_INTEGER: + case GL_GREEN: + case GL_BLUE: +#endif + case GL_ALPHA: + case GL_LUMINANCE: +#ifndef OPENGLES + case GL_GREEN_INTEGER: + case GL_BLUE_INTEGER: + case GL_ALPHA_INTEGER: +#endif + if (num_components == 1) { + memcpy(new_image, orig_image, orig_image_size); + return; + } + break; + +#ifndef OPENGLES_1 + case GL_RG: + case GL_RG_INTEGER: +#endif + case GL_LUMINANCE_ALPHA: + if (num_components == 2) { + memcpy(new_image, orig_image, orig_image_size); + return; + } + break; + +#ifndef OPENGLES_1 +#ifndef OPENGLES + case GL_BGR: +#endif + case GL_RGB_INTEGER: + if (num_components == 3) { + memcpy(new_image, orig_image, orig_image_size); + return; + } + if (num_components == 1 && component_width == 1) { + uchar_l_to_rgb(new_image, orig_image, orig_image_size); + return; + } + break; +#endif + case GL_RGB: - if (tex->get_num_components() == 1) { - new_image = PTA_uchar::empty_array(orig_image_size * 3); +#ifndef OPENGLES + case GL_BGR_INTEGER: +#endif + // Need to swap order. + if (num_components == 1 && component_width == 1) { uchar_l_to_rgb(new_image, orig_image, orig_image_size); - result = new_image; - break; + return; } - switch (tex->get_component_type()) { - case Texture::T_unsigned_byte: - case Texture::T_byte: - new_image = PTA_uchar::empty_array(orig_image_size); + if (num_components == 3 && component_width == 1) { uchar_bgr_to_rgb(new_image, orig_image, orig_image_size / 3); - result = new_image; - break; - - case Texture::T_unsigned_short: - case Texture::T_short: - new_image = PTA_uchar::empty_array(orig_image_size); - ushort_bgr_to_rgb((unsigned short *)new_image.p(), + return; + } + if (num_components == 3 && component_width == 2) { + ushort_bgr_to_rgb((unsigned short *)new_image, (const unsigned short *)orig_image, orig_image_size / 6); - result = new_image; - break; + return; + } + break; - default: - break; + case GL_BGRA: +#ifndef OPENGLES_1 + case GL_RGBA_INTEGER: +#endif + if (num_components == 4) { + memcpy(new_image, orig_image, orig_image_size); + return; + } + if (num_components == 1 && component_width == 1) { + uchar_l_to_rgba(new_image, orig_image, orig_image_size); + return; + } + if (num_components == 2 && component_width == 1) { + uchar_la_to_rgba(new_image, orig_image, orig_image_size / 2); + return; } break; case GL_RGBA: - if (tex->get_num_components() == 2) { - new_image = PTA_uchar::empty_array(orig_image_size * 2); +#ifndef OPENGLES + case GL_BGRA_INTEGER: +#endif + // Need to swap order. + if (num_components == 1 && component_width == 1) { + uchar_l_to_rgba(new_image, orig_image, orig_image_size); + return; + } + if (num_components == 2 && component_width == 1) { uchar_la_to_rgba(new_image, orig_image, orig_image_size / 2); - result = new_image; - break; + return; } - switch (tex->get_component_type()) { - case Texture::T_unsigned_byte: - case Texture::T_byte: - new_image = PTA_uchar::empty_array(orig_image_size); + if (num_components == 4 && component_width == 1) { uchar_bgra_to_rgba(new_image, orig_image, orig_image_size / 4); - result = new_image; - break; - - case Texture::T_unsigned_short: - case Texture::T_short: - new_image = PTA_uchar::empty_array(orig_image_size); - ushort_bgra_to_rgba((unsigned short *)new_image.p(), + return; + } + if (num_components == 4 && component_width == 2) { + ushort_bgra_to_rgba((unsigned short *)new_image, (const unsigned short *)orig_image, orig_image_size / 8); - result = new_image; - break; - - default: - break; + return; } break; @@ -505,7 +625,33 @@ fix_component_ordering(PTA_uchar &new_image, break; } - return result; + nassert_raise("Failed to convert image."); +} + +/** + * Reverses the order of the components within the image, to convert (for + * instance) GL_BGR to GL_RGB. Returns the byte pointer representing the + * converted image, or the original image if it is unchanged. + * + * new_image must be supplied; it is the PTA_uchar that will be used to hold + * the converted image if required. It will be modified only if the + * conversion is necessary, in which case the data will be stored there, and + * this pointer will be returned. If the conversion is not necessary, this + * pointer will be left unchanged. + */ +static const unsigned char * +fix_component_ordering(PTA_uchar &new_image, + const unsigned char *orig_image, size_t orig_image_size, + GLenum external_format, Texture *tex) { + if (external_format == GL_RGB || external_format == GL_RGBA) { + int num_components = tex->get_num_components(); + int component_width = tex->get_component_width(); + size_t new_image_size = (orig_image_size / num_components) * ((external_format == GL_RGBA) ? 4 : 3); + new_image = PTA_uchar::empty_array(new_image_size); + copy_image(&new_image[0], orig_image, orig_image_size, external_format, num_components, component_width); + return new_image; + } + return orig_image; } // #--- Zhao Nov2011 @@ -524,6 +670,7 @@ int CLP(GraphicsStateGuardian)::get_driver_shader_version_minor() { return _gl_s CLP(GraphicsStateGuardian):: CLP(GraphicsStateGuardian)(GraphicsEngine *engine, GraphicsPipe *pipe) : GraphicsStateGuardian(gl_coordinate_system, engine, pipe), + _job_queue_cvar(_job_queue_mutex), _renderbuffer_residency(get_prepared_objects()->get_name(), "renderbuffer"), _active_ppbuffer_memory_pcollector("Graphics memory:" + get_prepared_objects()->get_name() + ":Active:ppbuffer"), _inactive_ppbuffer_memory_pcollector("Graphics memory:" + get_prepared_objects()->get_name() + ":Inactive:ppbuffer") @@ -567,6 +714,13 @@ CLP(GraphicsStateGuardian)(GraphicsEngine *engine, GraphicsPipe *pipe) : _cg_context = 0; #endif +#ifdef HAVE_THREADS + AsyncTaskManager *task_mgr = AsyncTaskManager::get_global_ptr(); + _async_chain = task_mgr->make_task_chain("gl_texture_transfer", + gl_texture_transfer_num_threads, + gl_texture_transfer_thread_priority); +#endif + #ifdef DO_PSTATS if (gl_finish) { GLCAT.warning() @@ -585,6 +739,15 @@ CLP(GraphicsStateGuardian):: << "GLGraphicsStateGuardian " << this << " destructing\n"; } +#ifdef HAVE_THREADS + // Make sure there are no more async tasks that could reference this GSG. + _async_chain->wait_for_tasks(); +#endif + { + MutexHolder holder(_job_queue_mutex); + _job_queue.clear(); + } + close_gsg(); } @@ -1731,6 +1894,24 @@ reset() { } #endif +#ifndef OPENGLES_1 + _glCopyBufferSubData = nullptr; + if (_supports_buffers) { + if (is_at_least_gl_version(3, 1) || + is_at_least_gles_version(3, 0) || + has_extension("GL_ARB_copy_buffer")) { + _glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC) + get_extension_func("glCopyBufferSubData"); + } +#ifdef OPENGLES_2 + else if (has_extension("GL_NV_copy_buffer")) { + _glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC) + get_extension_func("glCopyBufferSubDataNV"); + } +#endif + } +#endif + #ifdef OPENGLES if (is_at_least_gles_version(3, 0)) { _glMapBufferRange = (PFNGLMAPBUFFERRANGEEXTPROC) @@ -1757,7 +1938,8 @@ reset() { _glMapBufferRange = nullptr; } - if (is_at_least_gl_version(4, 4) || has_extension("GL_ARB_buffer_storage")) { + if (_glMapBufferRange != nullptr && + (is_at_least_gl_version(4, 4) || has_extension("GL_ARB_buffer_storage"))) { _glBufferStorage = (PFNGLBUFFERSTORAGEPROC) get_extension_func("glBufferStorage"); @@ -2593,7 +2775,10 @@ reset() { #endif #ifndef OPENGLES - if (is_at_least_gl_version(4, 5) || has_extension("GL_ARB_direct_state_access")) { + _glMapNamedBufferRange = nullptr; + + if (gl_support_dsa && + (is_at_least_gl_version(4, 5) || has_extension("GL_ARB_direct_state_access"))) { _glCreateTextures = (PFNGLCREATETEXTURESPROC) get_extension_func("glCreateTextures"); _glTextureStorage2D = (PFNGLTEXTURESTORAGE2DPROC) @@ -2607,12 +2792,29 @@ reset() { _glBindTextureUnit = (PFNGLBINDTEXTUREUNITPROC) get_extension_func("glBindTextureUnit"); + if (_glMapBufferRange != nullptr) { + _glMapNamedBufferRange = (PFNGLMAPNAMEDBUFFERRANGEPROC) + get_extension_func("glMapNamedBufferRange"); + } + _supports_dsa = true; } else { _supports_dsa = false; } #endif +#ifndef OPENGLES_1 +#ifdef OPENGLES + if (is_at_least_gles_version(3, 0) || has_extension("GL_NV_pixel_buffer_object")) { +#else + if (is_at_least_gl_version(2, 1) || has_extension("GL_ARB_pixel_buffer_object")) { +#endif + _supports_pixel_buffers = true; + } else { + _supports_pixel_buffers = false; + } +#endif + #ifndef OPENGLES_1 // Do we support empty framebuffer objects? #ifdef OPENGLES @@ -3207,7 +3409,7 @@ reset() { _max_image_units = 0; #ifndef OPENGLES_1 #ifdef OPENGLES - if (is_at_least_gles_version(3, 1) && gl_immutable_texture_storage) { + if (is_at_least_gles_version(3, 1)) { #else if (is_at_least_gl_version(4, 2) || has_extension("GL_ARB_shader_image_load_store")) { #endif @@ -4279,6 +4481,8 @@ prepare_lens() { */ bool CLP(GraphicsStateGuardian):: begin_frame(Thread *current_thread) { + process_pending_jobs(false); + if (!GraphicsStateGuardian::begin_frame(current_thread)) { return false; } @@ -4292,9 +4496,11 @@ begin_frame(Thread *current_thread) { _primitive_batches_display_list_pcollector.clear_level(); #endif - if (!_async_ram_copies.empty()) { - finish_async_framebuffer_ram_copies(); +#ifndef OPENGLES_1 + if (!_fences.empty()) { + process_fences(false); } +#endif #if defined(DO_PSTATS) && !defined(OPENGLES) int frame_number = ClockObject::get_global_clock()->get_frame_count(current_thread); @@ -6252,25 +6458,24 @@ issue_memory_barrier(GLbitfield barriers) { _glMemoryBarrier(barriers); - // Indicate that barriers no longer need to be issued for the relevant lists - // of textures. + // Increment these counters to indicate that these barriers have been issued. if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT) { - _textures_needing_fetch_barrier.clear(); + ++_texture_fetch_barrier_counter; GLCAT.spam(false) << " texture_fetch"; } if (barriers & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) { - _textures_needing_image_access_barrier.clear(); + ++_shader_image_access_barrier_counter; GLCAT.spam(false) << " shader_image_access"; } if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT) { - _textures_needing_update_barrier.clear(); + ++_texture_update_barrier_counter; GLCAT.spam(false) << " texture_update"; } if (barriers & GL_FRAMEBUFFER_BARRIER_BIT) { - _textures_needing_framebuffer_barrier.clear(); + ++_framebuffer_barrier_counter; GLCAT.spam(false) << " framebuffer"; } @@ -6370,23 +6575,16 @@ prepare_texture(Texture *tex) { * (and if get_incomplete_render() is true). */ bool CLP(GraphicsStateGuardian):: -update_texture(TextureContext *tc, bool force) { +update_texture(TextureContext *tc, bool force, CompletionToken token) { CLP(TextureContext) *gtc; DCAST_INTO_R(gtc, tc, false); - Texture *tex = tc->get_texture(); - GLenum target = get_texture_target(tex->get_texture_type()); - if (gtc->_target != target) { - // The target has changed. That means we have to re-bind a new texture - // object. - gtc->reset_data(target, tex->get_num_views()); - } - if (gtc->was_image_modified() || !gtc->_has_storage) { PStatGPUTimer timer(this, _texture_update_pcollector); // If the texture image was modified, reload the texture. - bool okflag = upload_texture(gtc, force, tex->uses_mipmaps()); + Texture *tex = tc->get_texture(); + bool okflag = upload_texture(gtc, force, tex->uses_mipmaps(), std::move(token)); if (!okflag) { GLCAT.error() << "Could not load " << *tex << "\n"; @@ -6402,6 +6600,7 @@ update_texture(TextureContext *tc, bool force) { } else if (gtc->was_properties_modified()) { PStatGPUTimer timer(this, _texture_update_pcollector); + Texture *tex = tc->get_texture(); // If only the properties have been modified, we don't necessarily need to // reload the texture. @@ -6417,7 +6616,7 @@ update_texture(TextureContext *tc, bool force) { if (needs_reload) { gtc->mark_needs_reload(); - bool okflag = upload_texture(gtc, force, tex->uses_mipmaps()); + bool okflag = upload_texture(gtc, force, tex->uses_mipmaps(), std::move(token)); if (!okflag) { GLCAT.error() << "Could not load " << *tex << "\n"; @@ -6427,7 +6626,20 @@ update_texture(TextureContext *tc, bool force) { else { // The texture didn't need reloading, but mark it fully updated now. gtc->mark_loaded(); + + if (force) { + // This update is still underway. + gtc->wait_pending_uploads(); + } + token.complete(true); + } + } + else { + if (force) { + // This update is still underway. + gtc->wait_pending_uploads(); } + token.complete(true); } gtc->enqueue_lru(&_prepared_objects->_graphics_memory_lru); @@ -6445,12 +6657,9 @@ void CLP(GraphicsStateGuardian):: release_texture(TextureContext *tc) { CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc); -#ifndef OPENGLES_1 - _textures_needing_fetch_barrier.erase(gtc); - _textures_needing_image_access_barrier.erase(gtc); - _textures_needing_update_barrier.erase(gtc); - _textures_needing_framebuffer_barrier.erase(gtc); -#endif + gtc->cancel_pending_uploads(); + gtc->wait_pending_uploads(); + gtc->delete_unused_pbos(); gtc->set_num_views(0); delete gtc; @@ -6473,13 +6682,6 @@ release_textures(const pvector &contexts) { for (TextureContext *tc : contexts) { CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc); -#ifndef OPENGLES_1 - _textures_needing_fetch_barrier.erase(gtc); - _textures_needing_image_access_barrier.erase(gtc); - _textures_needing_update_barrier.erase(gtc); - _textures_needing_framebuffer_barrier.erase(gtc); -#endif - num_indices += gtc->_num_views; if (gtc->_buffers != nullptr) { num_buffers += gtc->_num_views; @@ -7516,7 +7718,6 @@ release_shader_buffers(const pvector &contexts) { */ bool CLP(GraphicsStateGuardian):: extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data) { - GLuint index = 0; BufferContext *bc = buffer->prepare_now(get_prepared_objects(), this); if (bc == nullptr || !bc->is_of_type(CLP(BufferContext)::get_class_type())) { return false; @@ -7525,6 +7726,10 @@ extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data) { data.resize(buffer->get_data_size_bytes()); + if (_glMemoryBarrier != nullptr) { + _glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + } + _glBindBuffer(GL_SHADER_STORAGE_BUFFER, gbc->_index); _glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, data.size(), &data[0]); @@ -7805,6 +8010,8 @@ framebuffer_copy_to_texture(Texture *tex, int view, int z, nassertr(tc != nullptr, false); CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc); + gtc->cancel_pending_uploads(); + GLenum target = get_texture_target(tex->get_texture_type()); if (gtc->_target != target) { gtc->reset_data(target, view + 1); @@ -7884,8 +8091,8 @@ framebuffer_copy_to_texture(Texture *tex, int view, int z, } #ifndef OPENGLES_1 - if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) { - // Make sure that any incoherent writes to this texture have been synced. + if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, true)) { + // Make sure that any reads and writes to this texture have been synced. issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT); } #endif @@ -8275,9 +8482,46 @@ framebuffer_copy_to_ram(Texture *tex, int view, int z, _glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); } #endif - GLsync fence = _glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - _async_ram_copies.push_back({request, pbo, fence, external_format, - view, mapped_ptr, image_size}); + + insert_fence([ + this, request = PT(ScreenshotRequest)(request), + mapped_ptr, size = image_size, + pbo, view, external_format + ] (bool success) { + + void *ptr = mapped_ptr; + if (ptr == nullptr) { + ptr = map_read_buffer(GL_PIXEL_PACK_BUFFER, pbo, size); + } + + // Do the memcpy in the background, since it can be slow. + auto func = [=](AsyncTask *task) { + const unsigned char *result = (unsigned char *)ptr; + PTA_uchar new_image; + if (external_format == GL_RGBA || external_format == GL_RGB) { + // We may have to reverse the byte ordering of the image if GL didn't do + // it for us. + result = fix_component_ordering(new_image, result, size, + external_format, request->get_result()); + } + request->set_view_data(view, result); + + // Finishing can take a long time, release the client buffer first so it + // can be reused for the next screenshot. + this->release_client_buffer(pbo, ptr, size); + request->finish(); + return AsyncTask::DS_done; + }; +#ifdef HAVE_THREADS + // We assign a sort value based on the originating frame number, so that + // earlier frames will be processed before subsequent frames, but we don't + // make it unique for every frame, which would kill concurrency. + int frame_number = request->get_frame_number(); + _async_chain->add(std::move(func), "screenshot", frame_number >> 3, -(frame_number & ((1 << 3) - 1))); +#else + func(nullptr); +#endif + }); } else #endif if (external_format == GL_RGBA || external_format == GL_RGB) { @@ -8302,104 +8546,6 @@ framebuffer_copy_to_ram(Texture *tex, int view, int z, return true; } -/** - * Finishes all asynchronous framebuffer-copy-to-ram operations. - */ -void CLP(GraphicsStateGuardian):: -finish_async_framebuffer_ram_copies(bool force) { -#ifndef OPENGLES_1 - if (_async_ram_copies.empty()) { - return; - } - - //XXX having a fixed number of threads is not a great idea. We ought to have - // a common thread pool that is sized based on the available number of CPUs. -#ifdef HAVE_THREADS - AsyncTaskManager *task_mgr = AsyncTaskManager::get_global_ptr(); - static AsyncTaskChain *chain = task_mgr->make_task_chain("texture_download", 2, TP_low); -#endif - - PStatTimer timer(_copy_texture_finish_pcollector); - - if (force) { - // Just wait for the last fence, the rest must be complete too then. - PStatTimer timer(_wait_fence_pcollector); - GLsync fence = _async_ram_copies.back()._fence; - _glClientWaitSync(fence, 0, (GLuint64)-1); - } - - while (!_async_ram_copies.empty()) { - AsyncRamCopy © = _async_ram_copies.front(); - if (!force) { - GLenum result = _glClientWaitSync(copy._fence, 0, 0); - if (result != GL_ALREADY_SIGNALED && result != GL_CONDITION_SATISFIED) { - // Not yet done. The rest must not yet be done then, either. - break; - } - } - _glDeleteSync(copy._fence); - - GLuint pbo = copy._pbo; - int view = copy._view; - PT(ScreenshotRequest) request = std::move(copy._request); - GLuint external_format = copy._external_format; - void *mapped_ptr = copy._mapped_pointer; - size_t size = copy._size; - - if (mapped_ptr == nullptr) { - _glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo); -#ifdef OPENGLES - // There is neither glMapBuffer nor persistent mapping in OpenGL ES - mapped_ptr = _glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, size, GL_MAP_READ_BIT); -#else - // If we get here in desktop GL, we must not have persistent mapping - nassertv(!_supports_buffer_storage); - mapped_ptr = _glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); -#endif - } - - // Do the memcpy in the background, since it can be slow. - auto func = [=](AsyncTask *task) { - const unsigned char *result = (unsigned char *)mapped_ptr; - PTA_uchar new_image; - if (external_format == GL_RGBA || external_format == GL_RGB) { - // We may have to reverse the byte ordering of the image if GL didn't do - // it for us. - result = fix_component_ordering(new_image, result, size, - external_format, request->get_result()); - } - request->set_view_data(view, result); - - // Finishing can take a long time, release the client buffer first so it - // can be reused for the next screenshot. - this->release_client_buffer(pbo, mapped_ptr, size); - request->finish(); - return AsyncTask::DS_done; - }; -#ifdef HAVE_THREADS - // We assign a sort value based on the originating frame number, so that - // earlier frames will be processed before subsequent frames, but we don't - // make it unique for every frame, which would kill concurrency. - int frame_number = request->get_frame_number(); - chain->add(std::move(func), "screenshot", frame_number >> 3, -(frame_number & ((1 << 3) - 1))); -#else - func(nullptr); -#endif - - _async_ram_copies.pop_front(); - - // If there is 1 remaining, save it for next frame. This helps prevent an - // inconsistent frame rate when the number of fetched frames alternates - // between 0 and 2, which can settle into a stable feedback loop. - if (!force && _async_ram_copies.size() == 1) { - break; - } - } - - _glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); -#endif -} - #ifdef SUPPORT_FIXED_FUNCTION /** * @@ -13870,12 +14016,15 @@ apply_sampler(GLuint unit, const SamplerState &sampler, CLP(TextureContext) *gtc * image. */ bool CLP(GraphicsStateGuardian):: -upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps) { +upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps, + CompletionToken token) { PStatGPUTimer timer(this, _load_texture_pcollector); Texture *tex = gtc->get_texture(); - if (_effective_incomplete_render && !force) { + //FIXME: upload simple texture for async uploaded thing + bool async_upload = true; + if (_effective_incomplete_render && !force && !async_upload) { bool has_image = _supports_compressed_texture ? tex->has_ram_image() : tex->has_uncompressed_ram_image(); if (!has_image && tex->might_have_ram_image() && tex->has_simple_ram_image() && @@ -14159,6 +14308,8 @@ upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps) { int num_views = tex->get_num_views(); if (needs_reload) { + gtc->cancel_pending_uploads(); + if (gtc->_immutable) { GLCAT.info() << "Attempt to modify texture with immutable storage, recreating texture.\n"; @@ -14172,8 +14323,8 @@ upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps) { #ifndef OPENGLES_1 if (needs_reload || !image.is_null()) { - // Make sure that any incoherent writes to this texture have been synced. - if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) { + // Make sure that any reads and writes to this texture have been synced. + if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, true)) { issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT); } } @@ -14190,46 +14341,73 @@ upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps) { nassertr(gtc->_buffers != nullptr, false); } - bool extract_success = false; - if (tex->get_post_load_store_cache()) { - extract_success = true; + bool compressed = image_compression != Texture::CM_off; + if (compressed && !_supports_compressed_texture) { + return false; } - bool success = true; - for (int view = 0; view < num_views; ++view) { - if (upload_texture_image(gtc, view, needs_reload || view >= old_num_views, - mipmap_bias, num_levels, - internal_format, external_format, - component_type, image_compression)) { - gtc->_has_storage = true; + // Does this texture require asynchronous uploads? +#if defined(HAVE_THREADS) && !defined(OPENGLES_1) + int async_buffers = _supports_pixel_buffers ? tex->get_num_async_transfer_buffers() : 0; + if (async_buffers != 0) { + // Prefer immutable storage, if supported. + if (needs_reload && _supports_tex_storage) { + gtc->_immutable = true; + } + } + else if (async_buffers == 0 && gtc->_num_pbos > 0) { + gtc->delete_unused_pbos(); + } +#else + int async_buffers = 0; +#endif + + // Keep track of which views are uploaded. + CompletionCounter counter; + bool success = true; + + for (int view = 0; view < num_views; ++view) { + if (upload_texture_view(gtc, view, needs_reload || view >= old_num_views, + mipmap_bias, num_levels, + internal_format, external_format, + component_type, compressed, async_buffers, + counter.make_token())) { + // We always create storage right away even if we do the upload of the + // actual data asynchronously. + gtc->_has_storage = true; gtc->_internal_format = internal_format; gtc->_width = width; gtc->_height = height; gtc->_depth = depth; gtc->_num_levels = num_levels; - - if (extract_success) { - // The next call assumes the texture is still bound. - if (!do_extract_texture_data(gtc, view)) { - extract_success = false; - } - } } else { success = false; } } - report_my_gl_errors(); + if (!success) { + report_my_gl_errors(); + return false; + } + + gtc->_uploads_pending++; + + std::move(counter).then([=, token = std::move(token)] (bool success) mutable { + --gtc->_uploads_pending; + if (!success) { + token.complete(false); + return; + } - if (success) { if (needs_reload) { gtc->update_data_size_bytes(get_texture_memory_size(gtc)); } - nassertr(gtc->_has_storage, false); + nassertv(gtc->_has_storage); - if (extract_success) { + Texture *tex = gtc->get_texture(); + if (tex->get_post_load_store_cache()) { tex->set_post_load_store_cache(false); // OK, get the RAM image, and save it in a BamCache record. if (tex->has_ram_image()) { @@ -14243,14 +14421,20 @@ upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps) { } GraphicsEngine *engine = get_engine(); - nassertr(engine != nullptr, false); + nassertv(engine != nullptr); engine->texture_uploaded(tex); - gtc->mark_loaded(); - return true; - } + token.complete(true); + }); - return false; + // Update the modified counters now, even if we've only spawned an async + // upload, because we've already set things in motion to update the texture + // to this version. Otherwise, future calls to update_texture will continue + // to try to update the image over and over again. + gtc->mark_loaded(); + + report_my_gl_errors(); + return true; } /** @@ -14258,17 +14442,13 @@ upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps) { * texture memory. */ bool CLP(GraphicsStateGuardian):: -upload_texture_image(CLP(TextureContext) *gtc, int view, bool needs_reload, - int mipmap_bias, int num_levels, GLint internal_format, - GLint external_format, GLenum component_type, - Texture::CompressionMode image_compression) { +upload_texture_view(CLP(TextureContext) *gtc, int view, bool needs_reload, + int mipmap_bias, int num_levels, GLint internal_format, + GLint external_format, GLenum component_type, + bool compressed, int async_buffers, CompletionToken token) { // Make sure the error stack is cleared out before we begin. clear_my_gl_errors(); - if (image_compression != Texture::CM_off && !_supports_compressed_texture) { - return false; - } - GLenum target = gtc->_target; if (target == GL_NONE) { // Unsupported target (e.g. 3-d texturing on GL 1.1). @@ -14305,9 +14485,14 @@ upload_texture_image(CLP(TextureContext) *gtc, int view, bool needs_reload, int width = tex->get_expected_mipmap_x_size(mipmap_bias); int height = tex->get_expected_mipmap_y_size(mipmap_bias); int depth = tex->get_expected_mipmap_z_size(mipmap_bias); + int num_components = tex->get_num_components(); + int expected_num_components = compressed ? num_components : get_external_format_components(external_format); + int component_width = tex->get_component_width(); + GLenum usage = GL_STATIC_DRAW; #ifndef OPENGLES_1 if (target == GL_TEXTURE_BUFFER) { + usage = get_usage(tex->get_usage_hint()); GLuint buffer = gtc->get_view_buffer(view); nassertr(buffer != 0, false); _glBindBuffer(GL_TEXTURE_BUFFER, buffer); @@ -14375,6 +14560,7 @@ upload_texture_image(CLP(TextureContext) *gtc, int view, bool needs_reload, // but we are not allowed to change the texture size or number of mipmap // levels after this point. if (gtc->_immutable) { + PStatTimer timer(_create_texture_storage_pcollector); if (GLCAT.is_debug()) { GLCAT.debug() << "allocating storage for texture " << tex->get_name() << ", " @@ -14409,15 +14595,84 @@ upload_texture_image(CLP(TextureContext) *gtc, int view, bool needs_reload, // How many mipmap levels do we have available to upload? int num_ram_mipmap_levels = 0; + GLuint pbo = 0; + size_t pbo_size = 0; + void *mapped_ptr = nullptr; if (!image.is_null()) { num_ram_mipmap_levels = std::min(num_levels, tex->get_num_ram_mipmap_images() - mipmap_bias); + + // Create a PBO that can hold all the mipmap levels. +#if defined(HAVE_THREADS) && !defined(OPENGLES_1) + if (async_buffers != 0) { + pbo_size = 0; + for (int n = mipmap_bias; n < num_ram_mipmap_levels + mipmap_bias; ++n) { + size_t view_size = tex->get_ram_mipmap_view_size(n); + if (!compressed) { + view_size = expected_num_components * (view_size / num_components); + } + pbo_size += view_size; + } + + bool create_storage = false; + if (pbo_size != gtc->_pbo_size) { + // No PBOs yet, or they aren't of the right size. + if (_supports_buffer_storage) { + // If using buffer storage, need to deallocate all of them. + gtc->delete_unused_pbos(); + } + gtc->_pbo_size = pbo_size; + create_storage = true; + } + else if (async_buffers > 0) { + // Wait for a PBO to become available if we're at our limit. + gtc->wait_for_unused_pbo(async_buffers); + } + + PStatTimer timer(_create_map_pbo_pcollector); + if (gtc->_unused_pbos.empty()) { + _glGenBuffers(1, &pbo); + gtc->_num_pbos++; + create_storage = true; + } else { + // Map an existing PBO. + pbo = gtc->_unused_pbos.back(); + gtc->_unused_pbos.pop_back(); + } + + mapped_ptr = map_write_discard_buffer(GL_PIXEL_UNPACK_BUFFER, pbo, + pbo_size, create_storage); + if (mapped_ptr == nullptr) { + report_my_gl_errors(); + GLCAT.warning() + << "Failed to map pixel unpack buffer.\n"; + gtc->_unused_pbos.push_back(pbo); + } + } +#endif } - if (!needs_reload) { - // Try to subload the image over the existing GL Texture object, possibly - // saving on texture memory fragmentation. + if (needs_reload && num_ram_mipmap_levels == 0 && + external_format == GL_DEPTH_STENCIL && get_supports_depth_stencil()) { +#ifdef OPENGLES + component_type = GL_UNSIGNED_INT_24_8_OES; +#else + component_type = GL_UNSIGNED_INT_24_8_EXT; +#endif + } - if (GLCAT.is_debug()) { + int upload_count = ++gtc->_uploads_started; + + if (GLCAT.is_debug()) { + if (needs_reload) { + // Load the image up from scratch, creating a new GL Texture object. + GLCAT.debug() + << "loading new texture object for " << tex->get_name() << " view " + << view << ", " << width << " x " << height << " x " << depth + << ", mipmaps " << num_ram_mipmap_levels << " / " << num_levels; + } + else { + // Try to subload the image over the existing GL Texture object, possibly + // saving on texture memory fragmentation. SparseArray pages = gtc->get_view_modified_pages(view, 0); if (num_ram_mipmap_levels == 0) { if (tex->has_clear_color()) { @@ -14425,457 +14680,622 @@ upload_texture_image(CLP(TextureContext) *gtc, int view, bool needs_reload, << "clearing texture " << tex->get_name() << " view " << view << ", " << width << " x " << height << " x " << depth << ", pages " << pages << ", mipmaps " << num_levels << ", clear_color = " - << tex->get_clear_color() << "\n"; + << tex->get_clear_color(); } else { GLCAT.debug() << "not loading NULL image for texture " << tex->get_name() << " view " << view << ", " << width << " x " << height << " x " << depth - << ", pages " << pages << ", mipmaps = " << num_levels << "\n"; + << ", pages " << pages << ", mipmaps = " << num_levels; } } else { GLCAT.debug() << "updating image data of texture " << tex->get_name() << " view " << view << ", " << width << " x " << height << " x " << depth << ", pages " << pages << ", mipmaps " << num_ram_mipmap_levels - << " / " << num_levels << "\n"; - } - } - - for (int n = mipmap_bias; n < num_levels + mipmap_bias; ++n) { - SparseArray pages = gtc->get_view_modified_pages(view, n); - - // we grab the mipmap pointer first, if it is NULL we grab the normal - // mipmap image pointer which is a PTA_uchar - const unsigned char *image_ptr = (unsigned char*)tex->get_ram_mipmap_pointer(n); - CPTA_uchar ptimage; - if (image_ptr == nullptr) { - ptimage = tex->get_ram_mipmap_image(n); - if (ptimage.is_null()) { - if (n - mipmap_bias < num_ram_mipmap_levels) { - // We were told we'd have this many RAM mipmap images, but we - // don't. Raise a warning. - GLCAT.warning() - << "No mipmap level " << n << " defined for " << tex->get_name() - << "\n"; - break; - } - - if (tex->has_clear_color()) { - // The texture has a clear color, so we should fill this mipmap - // level to a solid color. -#ifndef OPENGLES - if (target != GL_TEXTURE_BUFFER) { - if (_supports_clear_texture) { - // We can do that with the convenient glClearTexImage - // function. - vector_uchar clear_data = tex->get_clear_data(); + << " / " << num_levels; + } + } + if (mapped_ptr != nullptr) { + GLCAT.debug(false) << " (async #" << upload_count << " via buffer " << pbo << ")\n"; + } else { + GLCAT.debug(false) << " (#" << upload_count << ")\n"; + } + } - if (pages.has_all_of(0, depth)) { - _glClearTexImage(index, n - mipmap_bias, external_format, - component_type, (void *)&clear_data[0]); - } - else for (size_t sri = 0; sri < pages.get_num_subranges(); ++sri) { - int begin = pages.get_subrange_begin(sri); - int num_pages = pages.get_subrange_end(sri) - begin; - _glClearTexSubImage(index, n - mipmap_bias, 0, 0, begin, - width, height, num_pages, external_format, - component_type, (void *)&clear_data[0]); - } - continue; - } - } else { - if (_supports_clear_buffer) { - // For buffer textures we need to clear the underlying - // storage. - vector_uchar clear_data = tex->get_clear_data(); - - _glClearBufferData(GL_TEXTURE_BUFFER, internal_format, external_format, - component_type, (const void *)&clear_data[0]); - continue; - } - } -#endif // OPENGLES - // Ask the Texture class to create the mipmap level in RAM. It'll - // fill it in with the correct clear color, which we can then - // upload. - ptimage = tex->make_ram_mipmap_image(n); + // Keeps track of any async jobs we've spawned. +#if defined(HAVE_THREADS) && !defined(OPENGLES_1) + CompletionCounter counter; + struct AsyncLevel { + int width, height, depth; + size_t page_size; + uintptr_t pbo_offset; + SparseArray pages; + }; + AsyncLevel *async_levels = nullptr; + size_t num_async_levels = 0; + uintptr_t pbo_offset = 0u; +#endif + bool success = true; - } else { - // No clear color and no more images. - break; - } - } - image_ptr = ptimage; - } + for (int n = mipmap_bias; n < num_levels + mipmap_bias; ++n) { + SparseArray pages = gtc->get_view_modified_pages(view, n); + int level = n - mipmap_bias; + + int width = tex->get_expected_mipmap_x_size(n); + int height = tex->get_expected_mipmap_y_size(n); + + // we grab the mipmap pointer first, if it is NULL we grab the normal + // mipmap image pointer which is a PTA_uchar + const unsigned char *image_ptr = (unsigned char*)tex->get_ram_mipmap_pointer(n); + CPTA_uchar ptimage; + if (image_ptr == nullptr) { + ptimage = tex->get_ram_mipmap_image(n); + image_ptr = ptimage; + } + if (image_ptr == nullptr) { + if (level < num_ram_mipmap_levels) { + // We were told we'd have this many RAM mipmap images, but we + // don't. Raise a warning. + GLCAT.warning() + << "No mipmap level " << n << " defined for " << tex->get_name() + << "\n"; - PTA_uchar bgr_image; - size_t page_size = tex->get_ram_mipmap_page_size(n); - if (image_ptr != nullptr) { - const unsigned char *orig_image_ptr = image_ptr; - size_t view_size = tex->get_ram_mipmap_view_size(n); - image_ptr += view_size * view; - nassertr(image_ptr >= orig_image_ptr && image_ptr + view_size <= orig_image_ptr + tex->get_ram_mipmap_image_size(n), false); - - if (image_compression == Texture::CM_off) { - // If the GL doesn't claim to support BGR, we may have to reverse - // the component ordering of the image. - image_ptr = fix_component_ordering(bgr_image, image_ptr, view_size, - external_format, tex); + if (needs_reload && _supports_texture_max_level) { + // Tell the GL we have no more mipmaps for it to use. + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, level - 1); } + break; } - int width = tex->get_expected_mipmap_x_size(n); - int height = tex->get_expected_mipmap_y_size(n); - -#ifdef DO_PSTATS - _data_transferred_pcollector.add_level(page_size * pages.get_num_on_bits()); -#endif - switch (target) { -#ifndef OPENGLES_1 - case GL_TEXTURE_3D: - if (_supports_3d_texture) { - for (size_t sri = 0; sri < pages.get_num_subranges(); ++sri) { - int begin = pages.get_subrange_begin(sri); - int num_pages = pages.get_subrange_end(sri) - begin; - const unsigned char *page_ptr = image_ptr + page_size * begin; - - if (image_compression == Texture::CM_off) { - _glTexSubImage3D(target, n - mipmap_bias, - 0, 0, begin, width, height, num_pages, - external_format, component_type, page_ptr); - } else { - _glCompressedTexSubImage3D(target, n - mipmap_bias, - 0, 0, begin, width, height, num_pages, - external_format, - page_size * num_pages, page_ptr); + if (tex->has_clear_color()) { + // The texture has a clear color, so we should fill this mipmap + // level to a solid color. +#ifndef OPENGLES + if (target != GL_TEXTURE_BUFFER) { + if (_supports_clear_texture && !needs_reload) { + // We can do that with the convenient glClearTexImage + // function. + vector_uchar clear_data = tex->get_clear_data(); + + if (pages.has_all_of(0, depth)) { + _glClearTexImage(index, level, external_format, + component_type, (void *)&clear_data[0]); } + else for (size_t sri = 0; sri < pages.get_num_subranges(); ++sri) { + int begin = pages.get_subrange_begin(sri); + int num_pages = pages.get_subrange_end(sri) - begin; + _glClearTexSubImage(index, level, 0, 0, begin, + width, height, num_pages, external_format, + component_type, (void *)&clear_data[0]); + } + continue; } } else { - report_my_gl_errors(); - return false; + if (_supports_clear_buffer && !needs_reload) { + // For buffer textures we need to clear the underlying + // storage. + vector_uchar clear_data = tex->get_clear_data(); + + _glClearBufferData(GL_TEXTURE_BUFFER, internal_format, external_format, + component_type, (const void *)&clear_data[0]); + continue; + } } +#endif // OPENGLES + // Ask the Texture class to create the mipmap level in RAM. It'll + // fill it in with the correct clear color, which we can then + // upload. + ptimage = tex->make_ram_mipmap_image(n); + image_ptr = ptimage; + } + else if (!needs_reload) { + // No clear color and no more images, and no storage to create. break; -#endif // OPENGLES_1 - -#ifndef OPENGLES - case GL_TEXTURE_1D: - if (image_compression == Texture::CM_off) { - glTexSubImage1D(target, n - mipmap_bias, 0, width, - external_format, component_type, image_ptr); - } else { - _glCompressedTexSubImage1D(target, n - mipmap_bias, 0, width, - external_format, page_size, image_ptr); + } + else if (compressed) { + // We can't upload a NULL compressed texture. + if (_supports_texture_max_level) { + // Tell the GL we have no more mipmaps for it to use. + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, level - 1); } break; -#endif // OPENGLES + } + } -#ifndef OPENGLES_1 - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - if (_supports_2d_texture_array) { - for (size_t sri = 0; sri < pages.get_num_subranges(); ++sri) { - int begin = pages.get_subrange_begin(sri); - int num_pages = pages.get_subrange_end(sri) - begin; - const unsigned char *page_ptr = image_ptr + page_size * begin; - - if (image_compression == Texture::CM_off) { - _glTexSubImage3D(target, n - mipmap_bias, - 0, 0, begin, width, height, num_pages, - external_format, component_type, page_ptr); - } else { - _glCompressedTexSubImage3D(target, n - mipmap_bias, - 0, 0, begin, width, height, num_pages, - external_format, - page_size * num_pages, page_ptr); - } - } - } else { - report_my_gl_errors(); - return false; + // Select the correct view. + size_t orig_view_size = 0; + size_t orig_page_size = 0; + size_t page_size = 0; + if (image_ptr != nullptr) { + orig_view_size = tex->get_ram_mipmap_view_size(n); + if (view > 0) { + const unsigned char *orig_image_ptr = image_ptr; + image_ptr += orig_view_size * view; + nassertd(image_ptr >= orig_image_ptr && image_ptr + orig_view_size <= orig_image_ptr + tex->get_ram_mipmap_image_size(n)) { + success = false; + break; } - break; -#endif // OPENGLES_1 + } + orig_page_size = tex->get_ram_mipmap_page_size(n); + page_size = orig_page_size; + if (!compressed) { + // May need to convert. + page_size = get_external_format_components(external_format) * (page_size / num_components); + } + } #ifndef OPENGLES_1 - case GL_TEXTURE_BUFFER: - if (_supports_buffer_texture) { - _glBufferSubData(GL_TEXTURE_BUFFER, 0, page_size, image_ptr); - } else { - report_my_gl_errors(); - return false; + else if (target == GL_TEXTURE_BUFFER) { + // page_size for buffer texture indicates the size even for a null image. + page_size = tex->get_expected_ram_mipmap_view_size(n); + } +#endif + + // Don't need to update the padded area at the bottom. + int sub_height = height; + if (n == 0 && (target == GL_TEXTURE_2D || target == GL_TEXTURE_CUBE_MAP)) { + sub_height -= tex->get_pad_y_size(); + } + +#if defined(HAVE_THREADS) && !defined(OPENGLES_1) + if (mapped_ptr != nullptr) { + // Let's make sure we have texture storage (normally this is handled by + // the glTexStorage2D calls above, if immutable texture storage is + // supported and enabled), it makes other things easier down the line. + if (needs_reload) { + PStatTimer timer(_create_texture_storage_pcollector); + if (!upload_texture_level(true, compressed, target, level, + width, height, depth, internal_format, + external_format, component_type, + nullptr, page_size, pages, usage)) { + if (level == 0) { + // If level 0 failed to create, this texture is useless. + success = false; + } + else if (_supports_texture_max_level) { + // Apparently, this is all it's going to get. + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, level - 1); + } + break; } - break; -#endif // OPENGLES + } - case GL_TEXTURE_CUBE_MAP: - if (_supports_cube_map) { - // This is the only texture type that must be specified using separate - // per-page calls. - if (n == 0) { - height = tex->get_y_size() - tex->get_pad_y_size(); + // Spawn a task to do the upload asynchronously into the PBO. + if (image_ptr != nullptr) { + void *mapped_level_ptr = (char *)mapped_ptr + pbo_offset; + + if (ptimage) { + ptimage.node_ref(); + } + _async_chain->add([=, ptimage = std::move(ptimage), token = counter.make_token()](AsyncTask *task) mutable { + { + PStatTimer timer(_load_texture_copy_pcollector); + copy_image((unsigned char *)mapped_level_ptr, image_ptr, orig_view_size, + external_format, num_components, component_width); } - for (int z = 0; z < 6; ++z) { - if (pages.get_bit(z)) { - GLenum page_target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + z; - const unsigned char *page_ptr = image_ptr + page_size * z; - - if (image_compression == Texture::CM_off) { - glTexSubImage2D(page_target, n - mipmap_bias, 0, 0, width, height, - external_format, component_type, page_ptr); - } else { - _glCompressedTexSubImage2D(page_target, n - mipmap_bias, - 0, 0, width, height, - external_format, page_size, page_ptr); - } - } + if (ptimage) { + ptimage.node_unref(); } - } else { - report_my_gl_errors(); - return false; + + token.complete(true); + return AsyncTask::DS_done; + }, "copy:" + tex->get_name()); + + if (async_levels == nullptr) { + async_levels = new AsyncLevel[num_levels + 1]; } + async_levels[num_async_levels++] = {width, sub_height, depth, page_size, pbo_offset, std::move(pages)}; + pbo_offset += page_size * depth; + continue; + } + } +#endif + if (image_ptr != nullptr) { + if (page_size != orig_page_size || external_format == GL_RGBA || external_format == GL_RGB) { + // If the GL doesn't claim to support BGR, we may have to reverse + // the component ordering of the image. + PStatTimer timer(_load_texture_copy_pcollector); + PTA_uchar new_image = PTA_uchar::empty_array(page_size * depth); + copy_image(&new_image[0], image_ptr, orig_view_size, + external_format, num_components, component_width); + + ptimage = std::move(new_image); + } + } + + // Try updating the existing storage (sub-loading) first. + if (!needs_reload) { + if (!upload_texture_level(false, compressed, target, level, + width, sub_height, depth, internal_format, + external_format, component_type, + image_ptr, page_size, pages, usage)) { break; + } - default: - if (image_compression == Texture::CM_off) { - if (n == 0) { - // It's unfortunate that we can't adjust the width, too, but - // TexSubImage2D doesn't accept a row-stride parameter. - height = tex->get_y_size() - tex->get_pad_y_size(); - } - glTexSubImage2D(target, n - mipmap_bias, 0, 0, width, height, - external_format, component_type, image_ptr); - } else { - _glCompressedTexSubImage2D(target, n - mipmap_bias, 0, 0, width, height, - external_format, page_size, image_ptr); + // Did that fail? If it did, we'll immediately try again, this time + // loading the texture from scratch. + GLenum error_code = gl_get_error(); + if (error_code != GL_NO_ERROR) { + if (GLCAT.is_warning()) { + GLCAT.warning() + << "GL texture subload failed for " << tex->get_name() + << " level " << level << ": " << get_error_string(error_code) << "\n"; } - break; + needs_reload = true; } } - // Did that fail? If it did, we'll immediately try again, this time - // loading the texture from scratch. - GLenum error_code = gl_get_error(); - if (error_code != GL_NO_ERROR) { - if (GLCAT.is_debug()) { - GLCAT.debug() - << "GL texture subload failed for " << tex->get_name() - << " : " << get_error_string(error_code) << "\n"; + if (needs_reload) { + if (!upload_texture_level(true, compressed, target, level, + width, height, depth, internal_format, + external_format, component_type, + image_ptr, page_size, pages, usage)) { + + if (_supports_texture_max_level) { + // Apparently, this is all it's going to get. + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, level); + } + if (level == 0) { + success = false; + } + break; } - needs_reload = true; } } + // Purely synchronous path, we can finish up the creation now. + // Report the error message explicitly if the GL texture creation failed. if (needs_reload) { - // Load the image up from scratch, creating a new GL Texture object. - if (GLCAT.is_debug()) { - GLCAT.debug() - << "loading new texture object for " << tex->get_name() << " view " - << view << ", " << width << " x " << height << " x " << depth - << ", mipmaps " << num_ram_mipmap_levels << " / " << num_levels << "\n"; + GLenum error_code = gl_get_error(); + if (error_code != GL_NO_ERROR) { + GLCAT.error() + << "GL texture creation failed for " << tex->get_name() + << " : " << get_error_string(error_code) << "\n"; + + gtc->_has_storage = false; + success = false; } + } - // If there is immutable storage, this is impossible to do, and we should - // not have gotten here at all. - nassertr(!gtc->_immutable, false); +#if defined(HAVE_THREADS) && !defined(OPENGLES_1) + if (async_levels != nullptr) { + // Schedule a follow-up task to finish the upload, which needs to happen + // with bound context, so we use a special mini job queue for that. - if (num_ram_mipmap_levels == 0) { - if (GLCAT.is_debug()) { - GLCAT.debug() - << " (initializing NULL image)\n"; - } + // Storing 0 as last item saves some closure space. + async_levels[num_async_levels] = {0}; - if ((external_format == GL_DEPTH_STENCIL) && get_supports_depth_stencil()) { -#ifdef OPENGLES - component_type = GL_UNSIGNED_INT_24_8_OES; -#else - component_type = GL_UNSIGNED_INT_24_8_EXT; -#endif - } - } - - for (int n = mipmap_bias; n < num_levels + mipmap_bias; ++n) { - const unsigned char *image_ptr = (unsigned char*)tex->get_ram_mipmap_pointer(n); - CPTA_uchar ptimage; - if (image_ptr == nullptr) { - ptimage = tex->get_ram_mipmap_image(n); - if (ptimage.is_null()) { - if (n - mipmap_bias < num_ram_mipmap_levels) { - // We were told we'd have this many RAM mipmap images, but we - // don't. Raise a warning. - GLCAT.warning() - << "No mipmap level " << n << " defined for " << tex->get_name() - << "\n"; - if (_supports_texture_max_level) { - // Tell the GL we have no more mipmaps for it to use. - glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, n - mipmap_bias); + std::move(counter).then([ + this, token = std::move(token), + async_levels, gtc, view, pbo, pbo_size, upload_count, + external_format, component_type, compressed + ] (bool success) mutable { + call_later([=, token = std::move(token)] () mutable { + _glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo); + _glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + + if (gtc->_uploads_finished - upload_count >= 0) { + // Updates arrived out of order, so we skip this one, since a newer + // update was already finished. + GLCAT.info() + << "Discarding async update #" << upload_count << " to texture " + << gtc->get_texture()->get_name() << "\n"; + success = false; + } + else if (view >= gtc->_num_views) { + // If we uploaded a view that is no longer needed, we silently + // consider it a success, even if the task failed. + success = true; + } + else if (!apply_texture(gtc, view)) { + success = false; + } + else if (success) { + PStatTimer timer(_load_texture_pcollector); + + if (gtc->_target == GL_TEXTURE_BUFFER) { + // We can use a trick for buffer textures: just swap the "PBO" with + // the existing texture storage. The existing storage becomes the + // new PBO. Note also that buffer textures have no mipmaps. + _glTexBuffer(GL_TEXTURE_BUFFER, gtc->_internal_format, pbo); + std::swap(pbo, gtc->_buffers[view]); + } + else { + for (int level = 0; async_levels[level].width != 0; ++level) { + AsyncLevel &data = async_levels[level]; + if (!upload_texture_level(false, compressed, gtc->_target, level, + data.width, data.height, data.depth, + gtc->_internal_format, external_format, + component_type, (unsigned char *)data.pbo_offset, + data.page_size, data.pages, GL_STATIC_DRAW)) { + if (_supports_texture_max_level && !gtc->_generate_mipmaps) { + // Apparently, this is all it's going to get. + glTexParameteri(gtc->_target, GL_TEXTURE_MAX_LEVEL, level - 1); + } + success = false; + break; + } } - break; } - if (tex->has_clear_color()) { - // Ask the Texture class to create the mipmap level in RAM. It'll - // fill it in with the correct clear color, which we can then - // upload. - ptimage = tex->make_ram_mipmap_image(n); + if (success) { + gtc->_uploads_finished = upload_count; } - else if (image_compression != Texture::CM_off) { - // We can't upload a NULL compressed texture. - if (_supports_texture_max_level) { - // Tell the GL we have no more mipmaps for it to use. - glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, n - mipmap_bias); + + if (gtc->_generate_mipmaps && _glGenerateMipmap != nullptr) { + // We uploaded an image; we may need to generate mipmaps. + if (GLCAT.is_debug()) { + GLCAT.debug() + << "generating mipmaps for texture " << gtc->get_texture()->get_name() + << " view " << view << ", " << async_levels[0].width << " x " + << async_levels[0].height << " x " << async_levels[0].depth + << ", mipmaps = " << gtc->_num_levels + << " (async update #" << upload_count << ")\n"; + } + _glGenerateMipmap(gtc->_target); + } + + if (success && gtc->get_texture()->get_post_load_store_cache()) { + if (!do_extract_texture_data(gtc, view)) { + success = false; } - break; } } - image_ptr = ptimage; + _glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + // This goes back into the pool. + gtc->return_pbo(pbo, pbo_size); + + token.complete(success); + + delete[] async_levels; + }); + }); + } else +#endif + { + // This ensures any pending async op will not overwrite what we just did. + gtc->_uploads_finished = upload_count; + + if (pbo != 0) { + // For whatever reason, we haven't used the PBO, so return it. + gtc->return_pbo(pbo, pbo_size); + } + + if (gtc->_generate_mipmaps && _glGenerateMipmap != nullptr && !image.is_null()) { + // We uploaded an image; we may need to generate mipmaps. + if (GLCAT.is_debug()) { + GLCAT.debug() + << "generating mipmaps for texture " << gtc->get_texture()->get_name() << " view " + << view << ", " << width << " x " << height << " x " << depth + << ", mipmaps = " << num_levels << "\n"; } + _glGenerateMipmap(gtc->_target); + } - PTA_uchar bgr_image; - size_t view_size = tex->get_ram_mipmap_view_size(n); - if (image_ptr != nullptr) { - const unsigned char *orig_image_ptr = image_ptr; - image_ptr += view_size * view; - nassertr(image_ptr >= orig_image_ptr && image_ptr + view_size <= orig_image_ptr + tex->get_ram_mipmap_image_size(n), false); - - if (image_compression == Texture::CM_off) { - // If the GL doesn't claim to support BGR, we may have to reverse - // the component ordering of the image. - image_ptr = fix_component_ordering(bgr_image, image_ptr, view_size, - external_format, tex); - } + if (gtc->get_texture()->get_post_load_store_cache()) { + if (!do_extract_texture_data(gtc, view)) { + success = false; } + } - int width = tex->get_expected_mipmap_x_size(n); - int height = tex->get_expected_mipmap_y_size(n); -#ifndef OPENGLES_1 - int depth = tex->get_expected_mipmap_z_size(n); -#endif + token.complete(success); + } -#ifdef DO_PSTATS - _data_transferred_pcollector.add_level(view_size); -#endif - switch (target) { -#ifndef OPENGLES // 1-d textures not supported by OpenGL ES. Fall through. - case GL_TEXTURE_1D: - if (image_compression == Texture::CM_off) { - glTexImage1D(target, n - mipmap_bias, internal_format, - width, 0, external_format, component_type, image_ptr); - } else { - _glCompressedTexImage1D(target, n - mipmap_bias, external_format, - width, 0, view_size, image_ptr); - } - break; -#endif // OPENGLES // OpenGL ES will fall through. + report_my_gl_errors(); -#ifndef OPENGLES_1 - case GL_TEXTURE_3D: - if (_supports_3d_texture) { - if (image_compression == Texture::CM_off) { - _glTexImage3D(target, n - mipmap_bias, internal_format, - width, height, depth, 0, - external_format, component_type, image_ptr); - } else { - _glCompressedTexImage3D(target, n - mipmap_bias, external_format, - width, height, depth, 0, view_size, image_ptr); - } - } else { - report_my_gl_errors(); - return false; - } - break; -#endif // OPENGLES_1 + return success; +} +/** + * Performs the actual OpenGL call to update the texture data for the given + * mipmap level (be sure to subtract the mipmap_bias before passing it in). + * + * If full_reload is true, recreates the texture storage, otherwise subloads + * into the existing texture storage. A texture storage with undefined + * contents can be created by setting image_ptr to nullptr, in which case + * compressed must be false. + * + * Returns true if this texture format was supported, false otherwise. + */ +bool CLP(GraphicsStateGuardian):: +upload_texture_level(bool full_reload, bool compressed, GLenum target, + int level, int width, int height, int depth, + GLint internal_format, GLint external_format, + GLenum component_type, const unsigned char *image_ptr, + size_t page_size, SparseArray pages, + GLenum usage_hint) { + + switch (target) { #ifndef OPENGLES_1 - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - if (_supports_2d_texture_array) { - if (image_compression == Texture::CM_off) { - _glTexImage3D(target, n - mipmap_bias, internal_format, - width, height, depth, 0, - external_format, component_type, image_ptr); - } else { - _glCompressedTexImage3D(target, n - mipmap_bias, external_format, - width, height, depth, 0, view_size, image_ptr); - } - } else { - report_my_gl_errors(); - return false; - } - break; + case GL_TEXTURE_3D: + if (!_supports_3d_texture) { + return false; + } - case GL_TEXTURE_BUFFER: - if (_supports_buffer_texture) { - _glBufferData(GL_TEXTURE_BUFFER, view_size, image_ptr, - get_usage(tex->get_usage_hint())); + if (full_reload) { + if (!compressed) { + _glTexImage3D(target, level, internal_format, + width, height, depth, 0, + external_format, component_type, image_ptr); + } else { + _glCompressedTexImage3D(target, level, external_format, + width, height, depth, 0, page_size * depth, image_ptr); + } + } else { + for (size_t sri = 0; sri < pages.get_num_subranges(); ++sri) { + int begin = pages.get_subrange_begin(sri); + int num_pages = pages.get_subrange_end(sri) - begin; + const unsigned char *page_ptr = image_ptr + page_size * begin; + + if (!compressed) { + _glTexSubImage3D(target, level, + 0, 0, begin, width, height, num_pages, + external_format, component_type, page_ptr); } else { - report_my_gl_errors(); - return false; + _glCompressedTexSubImage3D(target, level, + 0, 0, begin, width, height, num_pages, + external_format, + page_size * num_pages, page_ptr); } - break; + } + } + break; #endif // OPENGLES_1 - case GL_TEXTURE_CUBE_MAP: - if (_supports_cube_map) { - // This is the only texture type that must be specified using separate - // per-page calls. - size_t page_size = tex->get_ram_mipmap_page_size(n); - for (int z = 0; z < 6; ++z) { - GLenum page_target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + z; - const unsigned char *page_ptr = - (image_ptr != nullptr) ? image_ptr + page_size * z : nullptr; - - if (image_compression == Texture::CM_off) { - glTexImage2D(page_target, n - mipmap_bias, internal_format, - width, height, 0, +#ifndef OPENGLES_1 + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + if (!_supports_2d_texture_array) { + return false; + } + + if (full_reload) { + if (!compressed) { + _glTexImage3D(target, level, internal_format, width, height, depth, 0, + external_format, component_type, image_ptr); + } else { + _glCompressedTexImage3D(target, level, external_format, + width, height, depth, 0, + page_size * depth, image_ptr); + } + } else { + for (size_t sri = 0; sri < pages.get_num_subranges(); ++sri) { + int begin = pages.get_subrange_begin(sri); + int num_pages = pages.get_subrange_end(sri) - begin; + const unsigned char *page_ptr = image_ptr + page_size * begin; + + if (!compressed) { + _glTexSubImage3D(target, level, + 0, 0, begin, width, height, num_pages, external_format, component_type, page_ptr); - } else { - _glCompressedTexImage2D(page_target, n - mipmap_bias, external_format, - width, height, 0, page_size, page_ptr); - } - } } else { - report_my_gl_errors(); - return false; + _glCompressedTexSubImage3D(target, level, + 0, 0, begin, width, height, num_pages, + external_format, + page_size * num_pages, page_ptr); } - break; + } + } + break; +#endif // !OPENGLES_1 - default: - if (image_compression == Texture::CM_off) { - glTexImage2D(target, n - mipmap_bias, internal_format, - width, height, 0, - external_format, component_type, image_ptr); +#ifndef OPENGLES_1 + case GL_TEXTURE_BUFFER: + if (!_supports_buffer_texture) { + return false; + } + + if (full_reload) { + _glBufferData(GL_TEXTURE_BUFFER, page_size, image_ptr, usage_hint); + } else { + _glBufferSubData(GL_TEXTURE_BUFFER, 0, page_size, image_ptr); + } + break; +#endif // !OPENGLES_1 + + case GL_TEXTURE_CUBE_MAP: + if (!_supports_cube_map) { + return false; + } + + // This is the only texture type that must be specified using separate + // per-page calls. + if (full_reload) { + for (int z = 0; z < 6; ++z) { + GLenum page_target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + z; + const unsigned char *page_ptr = + (image_ptr != nullptr) ? image_ptr + page_size * z : nullptr; + + if (!compressed) { + glTexImage2D(page_target, level, internal_format, width, height, 0, + external_format, component_type, page_ptr); } else { - _glCompressedTexImage2D(target, n - mipmap_bias, external_format, - width, height, 0, view_size, image_ptr); + _glCompressedTexImage2D(page_target, level, external_format, + width, height, 0, page_size, page_ptr); + } + } + } else { + for (int z = 0; z < 6; ++z) { + if (pages.get_bit(z)) { + GLenum page_target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + z; + const unsigned char *page_ptr = image_ptr + page_size * z; + + if (!compressed) { + glTexSubImage2D(page_target, level, 0, 0, width, height, + external_format, component_type, page_ptr); + } else { + _glCompressedTexSubImage2D(page_target, level, + 0, 0, width, height, + external_format, page_size, page_ptr); + } } } } + break; - // Report the error message explicitly if the GL texture creation failed. - GLenum error_code = gl_get_error(); - if (error_code != GL_NO_ERROR) { - GLCAT.error() - << "GL texture creation failed for " << tex->get_name() - << " : " << get_error_string(error_code) << "\n"; +#ifndef OPENGLES + case GL_TEXTURE_1D: + if (full_reload) { + if (!compressed) { + glTexImage1D(target, level, internal_format, + width, 0, external_format, component_type, image_ptr); + } else { + _glCompressedTexImage1D(target, level, external_format, + width, 0, page_size, image_ptr); + } + } else { + if (!compressed) { + glTexSubImage1D(target, level, 0, width, + external_format, component_type, image_ptr); + } else { + _glCompressedTexSubImage1D(target, level, 0, width, + external_format, page_size, image_ptr); + } + } + break; +#endif // !OPENGLES - gtc->_has_storage = false; - return false; + default: + if (full_reload) { + if (!compressed) { + glTexImage2D(target, level, internal_format, width, height, 0, + external_format, component_type, image_ptr); + } else { + _glCompressedTexImage2D(target, level, external_format, + width, height, 0, page_size, image_ptr); + } + } else { + if (!compressed) { + glTexSubImage2D(target, level, 0, 0, width, height, + external_format, component_type, image_ptr); + } else { + _glCompressedTexSubImage2D(target, level, 0, 0, width, height, + external_format, page_size, image_ptr); + } } + break; + } + +#ifdef DO_PSTATS + if (full_reload) { + _data_transferred_pcollector.add_level(page_size * depth); + } else { + _data_transferred_pcollector.add_level(pages.get_num_on_bits() * depth); } +#endif - if (gtc->_generate_mipmaps && _glGenerateMipmap != nullptr && !image.is_null()) { - // We uploaded an image; we may need to generate mipmaps. + // Did that fail? If it did, we'll immediately try again, this time + // loading the texture from scratch. + /*GLenum error_code = gl_get_error(); + if (error_code != GL_NO_ERROR) { if (GLCAT.is_debug()) { GLCAT.debug() - << "generating mipmaps for texture " << tex->get_name() << " view " - << view << ", " << width << " x " << height << " x " << depth - << ", mipmaps = " << num_levels << "\n"; + << "GL texture subload failed for " << tex->get_name() + << " : " << get_error_string(error_code) << "\n"; } - _glGenerateMipmap(target); - } - - report_my_gl_errors(); - + full_reload = true; + }*/ return true; } @@ -15171,7 +15591,7 @@ do_extract_texture_data(CLP(TextureContext) *gtc, int view) { #ifndef OPENGLES_1 // Make sure any incoherent writes to the texture have been synced. - if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) { + if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT, false)) { issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT); } #endif @@ -16174,3 +16594,160 @@ do_issue_scissor() { } } } + +#ifndef OPENGLES_1 +/** + * Maps a buffer for reading. May be temporarily bound to the given target. + */ +void *CLP(GraphicsStateGuardian):: +map_read_buffer(GLenum target, GLuint buffer, size_t size) { + nassertr(buffer != 0, nullptr); + +#ifndef OPENGLES + if (_glMapNamedBufferRange != nullptr) { + return _glMapNamedBufferRange(buffer, 0, size, GL_MAP_READ_BIT); + } +#endif + + void *mapped_ptr = nullptr; + + _glBindBuffer(target, buffer); +#ifdef OPENGLES + // There is neither glMapBuffer nor persistent mapping in OpenGL ES + mapped_ptr = _glMapBufferRange(target, 0, size, GL_MAP_READ_BIT); +#else + // If we get here in desktop GL, we must not have persistent mapping + mapped_ptr = _glMapBuffer(target, GL_READ_ONLY); +#endif + + _glBindBuffer(target, 0); + return mapped_ptr; +} + +/** + * Maps a buffer as write-only, discarding the previous contents. If + * create_storage is true, allocates new storage for the buffer. May use the + * given target to temporarily bind the buffer, if DSA is not supported. + */ +void *CLP(GraphicsStateGuardian):: +map_write_discard_buffer(GLenum target, GLuint buffer, size_t size, + bool create_storage) { + nassertr(buffer != 0, nullptr); + +#ifndef OPENGLES + if (!create_storage && _glMapNamedBufferRange != nullptr) { + return _glMapNamedBufferRange(buffer, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + } +#endif + + _glBindBuffer(target, buffer); + + void *mapped_ptr; + if (_glMapBufferRange != nullptr) { + if (create_storage) { + if (_supports_buffer_storage) { + _glBufferStorage(target, size, nullptr, GL_MAP_WRITE_BIT); + } else { + _glBufferData(target, size, nullptr, GL_STATIC_DRAW); + } + } + mapped_ptr = _glMapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + } else { +#ifdef OPENGLES + mapped_ptr = nullptr; +#else + // Explicitly orphan the buffer before mapping. + _glBufferData(target, size, nullptr, GL_STATIC_DRAW); + mapped_ptr = _glMapBuffer(target, GL_WRITE_ONLY); +#endif + } + + _glBindBuffer(target, 0); + return mapped_ptr; +} +#endif // !OPENGLES_1 + +#ifndef OPENGLES_1 +/** + * Inserts a fence into the command stream. + */ +void CLP(GraphicsStateGuardian):: +insert_fence(CompletionToken &&callback) { + GLsync fence = _glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + _fences.push_back({fence, std::move(callback)}); +} + +/** + * Checks which fences are finished and processes those. + */ +void CLP(GraphicsStateGuardian):: +process_fences(bool force) { + if (_fences.empty()) { + return; + } + + PStatTimer timer(_copy_texture_finish_pcollector); + + if (force) { + // Just wait for the last fence, the rest must be complete too then. + PStatTimer timer(_wait_fence_pcollector); + GLsync fence = _fences.back()._object; + _glClientWaitSync(fence, 0, (GLuint64)-1); + } + + while (!_fences.empty()) { + Fence &fence = _fences.front(); + if (!force) { + GLenum result = _glClientWaitSync(fence._object, 0, 0); + if (result != GL_ALREADY_SIGNALED && result != GL_CONDITION_SATISFIED) { + // Not yet done. The rest must not yet be done then, either. + break; + } + } + _glDeleteSync(fence._object); + + std::move(fence._token).complete(true); + _fences.pop_front(); + + // If there is 1 remaining, save it for next frame. This helps prevent an + // inconsistent frame rate when the number of fetched frames alternates + // between 0 and 2, which can settle into a stable feedback loop. + if (!force && _fences.size() == 1) { + break; + } + } +} +#endif // !OPENGLES_1 + +/** + * Adds a job to the queue to be processed later while the context is bound, + * useful for calling from other threads. + */ +void CLP(GraphicsStateGuardian):: +call_later(Completable &&job) { + MutexHolder holder(_job_queue_mutex); + _job_queue.push_back(std::move(job)); + _job_queue_cvar.notify(); +} + +/** + * Processes any pending jobs from the queue. If wait is true, waits for at + * least one job if the queue is empty. + * + * May only be called on the draw thread. + */ +void CLP(GraphicsStateGuardian):: +process_pending_jobs(bool wait) { + JobQueue jobs; + { + MutexHolder holder(_job_queue_mutex); + if (wait && _job_queue.empty()) { + _job_queue_cvar.wait(); + } + _job_queue.swap(jobs); + } + + for (auto &job : jobs) { + std::move(job)(); + } +} diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.h b/panda/src/glstuff/glGraphicsStateGuardian_src.h index 03f7a0097d7..a355962005e 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.h +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.h @@ -39,6 +39,8 @@ #include "geomVertexArrayData.h" #include "lightMutex.h" #include "pStatGPUTimer.h" +#include "completionToken.h" +#include "asyncTaskChain.h" class PlaneNode; class Light; @@ -230,6 +232,7 @@ typedef void (APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufS typedef void (APIENTRYP PFNGLPROGRAMBINARYPROC) (GLuint program, GLenum binaryFormat, const void *binary, GLsizei length); typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params); typedef void (APIENTRYP PFNGLBUFFERSTORAGEPROC) (GLenum target, GLsizeiptr size, const void *data, GLbitfield flags); +typedef void (APIENTRYP PFNGLCOPYBUFFERSUBDATAPROC) (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); typedef void (APIENTRYP PFNGLCLEARTEXIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, const void *data); typedef void (APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *data); @@ -345,7 +348,8 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { #endif virtual TextureContext *prepare_texture(Texture *tex); - virtual bool update_texture(TextureContext *tc, bool force); + virtual bool update_texture(TextureContext *tc, bool force, + CompletionToken token = CompletionToken()); virtual void release_texture(TextureContext *tc); virtual void release_textures(const pvector &contexts); virtual bool extract_texture_data(Texture *tex); @@ -419,7 +423,6 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { virtual bool framebuffer_copy_to_ram (Texture *tex, int view, int z, const DisplayRegion *dr, const RenderBuffer &rb, ScreenshotRequest *request); - void finish_async_framebuffer_ram_copies(bool force = false); #ifdef SUPPORT_FIXED_FUNCTION void apply_fog(Fog *fog); @@ -637,12 +640,21 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { bool apply_texture(CLP(TextureContext) *gtc, int view); bool apply_sampler(GLuint unit, const SamplerState &sampler, CLP(TextureContext) *gtc, int view); - bool upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps); - bool upload_texture_image(CLP(TextureContext) *gtc, int view, - bool needs_reload, int mipmap_bias, int num_levels, + bool upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps, + CompletionToken token = CompletionToken()); + bool upload_texture_view(CLP(TextureContext) *gtc, int view, + bool needs_reload, int mipmap_bias, int num_levels, + GLint internal_format, GLint external_format, + GLenum component_type, bool compressed, + int async_buffers, CompletionToken token); + bool upload_texture_level(bool full_reload, bool compressed, + GLenum target, int level, + int width, int height, int depth, GLint internal_format, GLint external_format, GLenum component_type, - Texture::CompressionMode image_compression); + const unsigned char *image_ptr, + size_t page_size, SparseArray pages, + GLenum usage_hint); void generate_mipmaps(CLP(TextureContext) *gtc); bool upload_simple_texture(CLP(TextureContext) *gtc); @@ -658,6 +670,20 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { void do_point_size(); #endif +#ifndef OPENGLES_1 + void *map_read_buffer(GLenum target, GLuint buffer, size_t size); + void *map_write_discard_buffer(GLenum target, GLuint buffer, size_t size, + bool create_storage); +#endif + +#ifndef OPENGLES_1 + void insert_fence(CompletionToken &&callback); + void process_fences(bool force); +#endif + + void call_later(Completable &&job); + void process_pending_jobs(bool wait); + enum AutoAntialiasMode { AA_poly, AA_line, @@ -904,6 +930,10 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { PFNGLGETBUFFERSUBDATAPROC _glGetBufferSubData; #endif +#ifndef OPENGLES_1 + PFNGLCOPYBUFFERSUBDATAPROC _glCopyBufferSubData; +#endif + #ifdef OPENGLES PFNGLMAPBUFFERRANGEEXTPROC _glMapBufferRange; PFNGLUNMAPBUFFEROESPROC _glUnmapBuffer; @@ -911,6 +941,10 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { PFNGLMAPBUFFERRANGEPROC _glMapBufferRange; #endif +#ifndef OPENGLES_1 + bool _supports_pixel_buffers; +#endif + #ifndef OPENGLES_1 bool _supports_uniform_buffers; bool _supports_shader_buffers; @@ -978,6 +1012,7 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { PFNGLTEXTUREPARAMETERIPROC _glTextureParameteri; PFNGLGENERATETEXTUREMIPMAPPROC _glGenerateTextureMipmap; PFNGLBINDTEXTUREUNITPROC _glBindTextureUnit; + PFNGLMAPNAMEDBUFFERRANGEPROC _glMapNamedBufferRange; #endif #ifndef OPENGLES_1 @@ -1162,12 +1197,14 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { #endif #ifndef OPENGLES_1 - // Stores textures for which memory bariers should be issued. - typedef pset TextureSet; - TextureSet _textures_needing_fetch_barrier; - TextureSet _textures_needing_image_access_barrier; - TextureSet _textures_needing_update_barrier; - TextureSet _textures_needing_framebuffer_barrier; + // This count increments every time the corresponding barrier is issued. + // GLTextureContext et al store copies of this counter, when a write is + // performed on a texture, it will set its counter to match the value on the + // GSG to indicate that it is out of sync and the barrier needs to be issued. + int _texture_fetch_barrier_counter = 0; + int _shader_image_access_barrier_counter = 0; + int _texture_update_barrier_counter = 0; + int _framebuffer_barrier_counter = 0; int _shader_storage_barrier_counter = 0; #endif @@ -1218,16 +1255,21 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { FrameTiming *_current_frame_timing = nullptr; #endif - struct AsyncRamCopy { - PT(ScreenshotRequest) _request; - GLuint _pbo; - GLsync _fence; - GLuint _external_format; - int _view; - void *_mapped_pointer; - size_t _size; + struct Fence { + GLsync _object; + CompletionToken _token; }; - pdeque _async_ram_copies; + pdeque _fences; + +#ifdef HAVE_THREADS + AsyncTaskChain *_async_chain; +#endif + + // Min job system pending a real job system + typedef pvector JobQueue; + Mutex _job_queue_mutex; + ConditionVar _job_queue_cvar; + JobQueue _job_queue; BufferResidencyTracker _renderbuffer_residency; @@ -1272,6 +1314,7 @@ class EXPCL_GL CLP(GraphicsStateGuardian) : public GraphicsStateGuardian { friend class CLP(BufferContext); friend class CLP(ShaderContext); friend class CLP(CgShaderContext); + friend class CLP(TextureContext); friend class CLP(GraphicsBuffer); friend class CLP(OcclusionQueryContext); }; diff --git a/panda/src/glstuff/glShaderContext_src.cxx b/panda/src/glstuff/glShaderContext_src.cxx index dde88cc5546..f928882d2f4 100644 --- a/panda/src/glstuff/glShaderContext_src.cxx +++ b/panda/src/glstuff/glShaderContext_src.cxx @@ -2813,12 +2813,6 @@ update_shader_texture_bindings(ShaderContext *prev) { int view = _glgsg->get_current_tex_view_offset(); gl_tex = gtc->get_view_index(view); - -#ifndef OPENGLES - if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)) { - barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; - } -#endif } } input._writable = false; @@ -2879,7 +2873,17 @@ update_shader_texture_bindings(ShaderContext *prev) { access = GL_READ_ONLY; gl_tex = 0; } + } else { + // If no parameters were specified, we have to assume writable access. + input._writable = true; } + +#ifndef OPENGLES + if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT, input._writable)) { + barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; + } +#endif + _glgsg->_glBindImageTexture(i, gl_tex, bind_level, layered, bind_layer, access, gtc->_internal_format); } @@ -2969,7 +2973,7 @@ update_shader_texture_bindings(ShaderContext *prev) { #ifndef OPENGLES // If it was recently written to, we will have to issue a memory barrier // soon. - if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT)) { + if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT, false)) { barriers |= GL_TEXTURE_FETCH_BARRIER_BIT; } #endif diff --git a/panda/src/glstuff/glTextureContext_src.I b/panda/src/glstuff/glTextureContext_src.I index bde6a4d977a..a0a986e80f9 100644 --- a/panda/src/glstuff/glTextureContext_src.I +++ b/panda/src/glstuff/glTextureContext_src.I @@ -59,3 +59,43 @@ get_view_buffer(int view) const { return 0; } } + +/** + * Returns true if an async upload is pending. + */ +INLINE bool CLP(TextureContext):: +is_upload_pending() const { + // We can't simply compare _uploads_started to _uploads_finished, since + // they also get set to the same by cancel_pending_uploads() + return _uploads_pending > 0; +} + +/** + * Waits for all uploads to be finished. + */ +INLINE void CLP(TextureContext):: +wait_pending_uploads() const { + if (is_upload_pending()) { + do_wait_pending_uploads(); + } +} + +/** + * Cancels all asynchronous uploads. Not guaranteed to be cancelled by the + * time this returns, consider following this up with a call to + * wait_pending_uploads(). + */ +INLINE void CLP(TextureContext):: +cancel_pending_uploads() { + _uploads_finished = _uploads_started; +} + +/** + * Waits for an unused PBO unless we're not at the given limit of PBOs yet. + */ +INLINE void CLP(TextureContext):: +wait_for_unused_pbo(int limit) const { + if (_unused_pbos.empty() && _num_pbos >= limit) { + do_wait_for_unused_pbo(limit); + } +} diff --git a/panda/src/glstuff/glTextureContext_src.cxx b/panda/src/glstuff/glTextureContext_src.cxx index 7572ef8a4f4..efc001df347 100644 --- a/panda/src/glstuff/glTextureContext_src.cxx +++ b/panda/src/glstuff/glTextureContext_src.cxx @@ -13,6 +13,8 @@ #include "pnotify.h" +static PStatCollector _wait_async_texture_uploads_pcollector("Wait:Async Texture Uploads"); + TypeHandle CLP(TextureContext)::_type_handle; /** @@ -48,6 +50,8 @@ evict_lru() { */ void CLP(TextureContext):: reset_data(GLenum target, int num_views) { + cancel_pending_uploads(); + // Free the texture resources. set_num_views(0); @@ -63,12 +67,13 @@ reset_data(GLenum target, int num_views) { #ifndef OPENGLES_1 // Mark the texture as coherent. - if (gl_enable_memory_barriers) { - _glgsg->_textures_needing_fetch_barrier.erase(this); - _glgsg->_textures_needing_image_access_barrier.erase(this); - _glgsg->_textures_needing_update_barrier.erase(this); - _glgsg->_textures_needing_framebuffer_barrier.erase(this); - } + _texture_fetch_barrier_counter = _glgsg->_texture_fetch_barrier_counter - 1; + _shader_image_read_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1; + _shader_image_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1; + _texture_read_barrier_counter = _glgsg->_texture_update_barrier_counter - 1; + _texture_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1; + _framebuffer_read_barrier_counter = _glgsg->_framebuffer_barrier_counter - 1; + _framebuffer_write_barrier_counter = _glgsg->_framebuffer_barrier_counter - 1; #endif } @@ -168,26 +173,50 @@ set_num_views(int num_views) { #ifndef OPENGLES_1 /** - * + * Returns true if the texture needs a barrier before a read or write of the + * given kind. If writing is false, only writes are synced, otherwise both + * reads and writes are synced. */ bool CLP(TextureContext):: -needs_barrier(GLbitfield barrier) { +needs_barrier(GLbitfield barrier, bool writing) { if (!gl_enable_memory_barriers) { return false; } - return (((barrier & GL_TEXTURE_FETCH_BARRIER_BIT) && - _glgsg->_textures_needing_fetch_barrier.count(this))) - || (((barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) && - _glgsg->_textures_needing_image_access_barrier.count(this))) - || (((barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) && - _glgsg->_textures_needing_update_barrier.count(this))) - || (((barrier & GL_FRAMEBUFFER_BARRIER_BIT) && - _glgsg->_textures_needing_framebuffer_barrier.count(this))); + if (barrier & GL_TEXTURE_FETCH_BARRIER_BIT) { + // This is always a read, so only sync RAW. + if (_glgsg->_texture_fetch_barrier_counter == _texture_fetch_barrier_counter) { + return true; + } + } + + if (barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) { + // Sync WAR, WAW and RAW, but not RAR. + if ((writing && _glgsg->_shader_image_access_barrier_counter == _shader_image_read_barrier_counter) || + (_glgsg->_shader_image_access_barrier_counter == _shader_image_write_barrier_counter)) { + return true; + } + } + + if (barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) { + if ((writing && _glgsg->_texture_update_barrier_counter == _texture_read_barrier_counter) || + (_glgsg->_texture_update_barrier_counter == _texture_write_barrier_counter)) { + return true; + } + } + + if (barrier & GL_FRAMEBUFFER_BARRIER_BIT) { + if ((writing && _glgsg->_framebuffer_barrier_counter == _framebuffer_read_barrier_counter) || + (_glgsg->_framebuffer_barrier_counter == _framebuffer_write_barrier_counter)) { + return true; + } + } + + return false; } /** - * Mark a texture as needing a memory barrier, since a non-coherent read or + * Mark a texture as needing a memory barrier, since an unsynchronized read or * write just happened to it. If 'wrote' is true, it was written to. */ void CLP(TextureContext):: @@ -199,16 +228,73 @@ mark_incoherent(bool wrote) { // If we only read from it, the next read operation won't need another // barrier, since it'll be reading the same data. if (wrote) { - _glgsg->_textures_needing_fetch_barrier.insert(this); + _texture_fetch_barrier_counter = _glgsg->_texture_fetch_barrier_counter; + _shader_image_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter; + _texture_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter; + _framebuffer_write_barrier_counter = _glgsg->_framebuffer_barrier_counter; } // We could still write to it before we read from it, so we have to always - // insert these barriers. This could be slightly optimized so that we don't - // issue a barrier between consecutive image reads, but that may not be - // worth the trouble. - _glgsg->_textures_needing_image_access_barrier.insert(this); - _glgsg->_textures_needing_update_barrier.insert(this); - _glgsg->_textures_needing_framebuffer_barrier.insert(this); + // insert these barriers. + _shader_image_read_barrier_counter = _glgsg->_shader_image_access_barrier_counter; + _texture_read_barrier_counter = _glgsg->_texture_update_barrier_counter; + _framebuffer_read_barrier_counter = _glgsg->_framebuffer_barrier_counter; } #endif // !OPENGLES_1 + +/** + * Returns a PBO with the given size to the pool of unused PBOs. + */ +void CLP(TextureContext):: +return_pbo(GLuint pbo, size_t size) { + // Also triggers when the number of buffers is -1 (which effectively means + // to always delete the buffers after use). + if (_num_pbos > get_texture()->get_num_async_transfer_buffers() || + size < _pbo_size) { + // We have too many PBOs, or this PBO is no longer of the proper + // size, so delete it rather than returning it to the pool. + _num_pbos--; + _glgsg->_glDeleteBuffers(1, &pbo); + } else { + _unused_pbos.push_front(pbo); + } +} + +/** + * Deletes all unused PBOs. + */ +void CLP(TextureContext):: +delete_unused_pbos() { + if (!_unused_pbos.empty()) { + for (GLuint pbo : _unused_pbos) { + _glgsg->_glDeleteBuffers(1, &pbo); + } + _num_pbos -= (int)_unused_pbos.size(); + _unused_pbos.clear(); + } +} + +/** + * Waits for all uploads to be finished. + */ +void CLP(TextureContext):: +do_wait_pending_uploads() const { + PStatTimer timer(_wait_async_texture_uploads_pcollector); + do { + _glgsg->process_pending_jobs(true); + } + while (is_upload_pending()); +} + +/** + * + */ +void CLP(TextureContext):: +do_wait_for_unused_pbo(int limit) const { + PStatTimer timer(_wait_async_texture_uploads_pcollector); + do { + _glgsg->process_pending_jobs(true); + } + while (_unused_pbos.empty() && _num_pbos >= limit); +} diff --git a/panda/src/glstuff/glTextureContext_src.h b/panda/src/glstuff/glTextureContext_src.h index c4244884714..03626a46ce7 100644 --- a/panda/src/glstuff/glTextureContext_src.h +++ b/panda/src/glstuff/glTextureContext_src.h @@ -41,12 +41,24 @@ class EXPCL_GL CLP(TextureContext) : public TextureContext { INLINE GLuint get_view_buffer(int view) const; #ifdef OPENGLES_1 - static constexpr bool needs_barrier(GLbitfield barrier) { return false; }; + static constexpr bool needs_barrier(GLbitfield barrier, bool writing) { return false; }; #else - bool needs_barrier(GLbitfield barrier); + bool needs_barrier(GLbitfield barrier, bool writing); void mark_incoherent(bool wrote); #endif + INLINE bool is_upload_pending() const; + INLINE void wait_pending_uploads() const; + INLINE void cancel_pending_uploads(); + + void return_pbo(GLuint pbo, size_t size); + void delete_unused_pbos(); + INLINE void wait_for_unused_pbo(int limit) const; + +private: + void do_wait_pending_uploads() const; + void do_wait_for_unused_pbo(int limit) const; + private: // This is the GL "name" of the texture object. GLuint _index; @@ -76,8 +88,25 @@ class EXPCL_GL CLP(TextureContext) : public TextureContext { GLenum _target; SamplerState _active_sampler; + // These counters are used to prevent out-of-order updates. + int _uploads_started = 0; + int _uploads_finished = 0; + int _uploads_pending = 0; + pdeque _unused_pbos; + int _num_pbos = 0; + size_t _pbo_size = 0; + CLP(GraphicsStateGuardian) *_glgsg; + // These are set to the equivalent counter in glgsg when a write is performed. + int _texture_fetch_barrier_counter = -1; + int _shader_image_read_barrier_counter = -1; + int _shader_image_write_barrier_counter = -1; + int _texture_read_barrier_counter = -1; + int _texture_write_barrier_counter = -1; + int _framebuffer_read_barrier_counter = -1; + int _framebuffer_write_barrier_counter = -1; + public: static TypeHandle get_class_type() { return _type_handle; diff --git a/panda/src/glstuff/glmisc_src.cxx b/panda/src/glstuff/glmisc_src.cxx index f223ef266e7..d1ae931c7ce 100644 --- a/panda/src/glstuff/glmisc_src.cxx +++ b/panda/src/glstuff/glmisc_src.cxx @@ -22,6 +22,11 @@ ConfigVariableBool gl_forward_compatible PRC_DESC("Setting this to true will request a forward-compatible OpenGL " "context, which will not support the fixed-function pipeline.")); +ConfigVariableBool gl_support_dsa + ("gl-support-dsa", true, + PRC_DESC("Configure this false if you suspect your GL's implementation of " + "Direct State Access is broken.")); + ConfigVariableBool gl_support_fbo ("gl-support-fbo", true, PRC_DESC("Configure this false if your GL's implementation of " @@ -321,6 +326,19 @@ ConfigVariableBool gl_depth_zero_to_one "range from 0 to 1, matching other graphics APIs. This setting " "requires OpenGL 4.5, or NVIDIA GeForce 8+ hardware.")); +ConfigVariableInt gl_texture_transfer_num_threads + ("gl-texture-transfer-num-threads", 2, + PRC_DESC("The number of threads that will be started to upload and download " + "texture data asynchronously, either via the setup_async_transfer " + "interface on the the Texture class or via the async screenshot " + "interface.")); + +ConfigVariableEnum gl_texture_transfer_thread_priority + ("gl-texture-transfer-thread-priority", TP_normal, + PRC_DESC("The default thread priority to assign to the threads created for " + "asynchronous texture transfers. The default is 'normal'; you may " + "also specify 'low', 'high', or 'urgent'.")); + extern ConfigVariableBool gl_parallel_arrays; void CLP(init_classes)() { diff --git a/panda/src/glstuff/glmisc_src.h b/panda/src/glstuff/glmisc_src.h index 1cc96726fe5..deb219dc5a9 100644 --- a/panda/src/glstuff/glmisc_src.h +++ b/panda/src/glstuff/glmisc_src.h @@ -17,6 +17,7 @@ #include "configVariableEnum.h" #include "geomEnums.h" #include "coordinateSystem.h" +#include "threadPriority.h" // Define some macros to transparently map to the double or float versions of // the OpenGL function names. @@ -35,6 +36,7 @@ extern EXPCL_GL ConfigVariableInt gl_version; extern EXPCL_GL ConfigVariableBool gl_forward_compatible; extern EXPCL_GL ConfigVariableBool gl_support_fbo; +extern ConfigVariableBool gl_support_dsa; extern ConfigVariableBool gl_cheap_textures; extern ConfigVariableBool gl_ignore_clamp; extern ConfigVariableBool gl_support_clamp_to_border; @@ -75,6 +77,8 @@ extern ConfigVariableBool gl_support_shadow_filter; extern ConfigVariableBool gl_support_vertex_array_bgra; extern ConfigVariableBool gl_force_image_bindings_writeonly; extern ConfigVariableEnum gl_coordinate_system; +extern ConfigVariableInt gl_texture_transfer_num_threads; +extern ConfigVariableEnum gl_texture_transfer_thread_priority; extern EXPCL_GL void CLP(init_classes)(); diff --git a/panda/src/gobj/preparedGraphicsObjects.cxx b/panda/src/gobj/preparedGraphicsObjects.cxx index b76b0524765..3089537dd2c 100644 --- a/panda/src/gobj/preparedGraphicsObjects.cxx +++ b/panda/src/gobj/preparedGraphicsObjects.cxx @@ -1515,9 +1515,24 @@ begin_frame(GraphicsStateGuardianBase *gsg, Thread *current_thread) { Texture *tex = qti->first; TextureContext *tc = tex->prepare_now(this, gsg); if (tc != nullptr) { - gsg->update_texture(tc, true); - if (qti->second != nullptr) { - qti->second->set_result(tc); + if (tex->get_num_async_transfer_buffers() == 0) { + gsg->update_texture(tc, true); + if (qti->second != nullptr) { + qti->second->set_result(tc); + } + } else { + // Async update + CompletionToken token; + if (qti->second != nullptr) { + token = [tc, fut = std::move(qti->second)] (bool success) { + if (success) { + fut->set_result(tc); + } else { + fut->notify_removed(); + } + }; + } + gsg->update_texture(tc, false, std::move(token)); } } } diff --git a/panda/src/gobj/texture.I b/panda/src/gobj/texture.I index 0d349f6653b..d304bfbfed4 100644 --- a/panda/src/gobj/texture.I +++ b/panda/src/gobj/texture.I @@ -2139,6 +2139,14 @@ rescale_texture() { return do_rescale_texture(cdata); } +/** + * Returns the number previously passed to setup_async_transfer(). + */ +INLINE int Texture:: +get_num_async_transfer_buffers() const { + return _num_async_transfer_buffers.load(std::memory_order_relaxed); +} + /** * Works like adjust_size, but also considers the texture class. Movie * textures, for instance, always pad outwards, regardless of textures- diff --git a/panda/src/gobj/texture.cxx b/panda/src/gobj/texture.cxx index 2afe0f03205..b8b1f5bbf15 100644 --- a/panda/src/gobj/texture.cxx +++ b/panda/src/gobj/texture.cxx @@ -1570,6 +1570,27 @@ get_view_modified_pages(UpdateSeq since, int view, int n) const { return result; } +/** + * Sets the number of buffers for asynchronous upload of texture data. If this + * number is higher than 0, future texture uploads will occur in the background, + * up to the provided amount at a time. The asynchronous upload will be + * triggered by calls to prepare() or when the texture comes into view and + * allow-incomplete-render is true. + * + * Each buffer is only large enough to contain a single view, so you may wish + * to create twice as many buffers if you want to update twice as many views. + * + * You can also pass the special value -1, which means to create as many + * buffers as is necessary for all asynchronous uploads to take place, and they + * will be deleted afterwards automatically. + * + * This setting will take effect immediately. + */ +void Texture:: +setup_async_transfer(int num_buffers) { + _num_async_transfer_buffers.store(num_buffers); +} + /** * Indicates that the texture should be enqueued to be prepared in the * indicated prepared_objects at the beginning of the next frame. This will @@ -5704,7 +5725,14 @@ do_modify_ram_image(CData *cdata) { } else { do_clear_ram_mipmap_images(cdata); } - return cdata->_ram_images[0]._image; + PTA_uchar data = cdata->_ram_images[0]._image; + if (data.get_node_ref_count() > 0) { + // Copy on write, if an upload thread is reading this now. + PTA_uchar new_data = PTA_uchar::empty_array(0); + new_data.v() = data.v(); + data.swap(new_data); + } + return data; } /** @@ -5779,7 +5807,15 @@ do_modify_ram_mipmap_image(CData *cdata, int n) { cdata->_ram_images[n]._image.empty()) { do_make_ram_mipmap_image(cdata, n); } - return cdata->_ram_images[n]._image; + + PTA_uchar data = cdata->_ram_images[n]._image; + if (data.get_node_ref_count() > 0) { + // Copy on write, if an upload thread is reading this now. + PTA_uchar new_data = PTA_uchar::empty_array(0); + new_data.v() = data.v(); + data.swap(new_data); + } + return data; } /** diff --git a/panda/src/gobj/texture.h b/panda/src/gobj/texture.h index f6ec5eb1f2a..4ba2b58169b 100644 --- a/panda/src/gobj/texture.h +++ b/panda/src/gobj/texture.h @@ -47,6 +47,7 @@ #include "pfmFile.h" #include "asyncTask.h" #include "extension.h" +#include "patomic.h" class TextureContext; class FactoryParams; @@ -536,6 +537,8 @@ class EXPCL_PANDA_GOBJ Texture : public TypedWritableReferenceCount, public Nama MAKE_PROPERTY(auto_texture_scale, get_auto_texture_scale, set_auto_texture_scale); + void setup_async_transfer(int num_buffers); + PT(AsyncFuture) prepare(PreparedGraphicsObjects *prepared_objects); bool is_prepared(PreparedGraphicsObjects *prepared_objects) const; bool was_image_modified(PreparedGraphicsObjects *prepared_objects) const; @@ -628,6 +631,7 @@ class EXPCL_PANDA_GOBJ Texture : public TypedWritableReferenceCount, public Nama public: void texture_uploaded(); + INLINE int get_num_async_transfer_buffers() const; virtual bool has_cull_callback() const; virtual bool cull_callback(CullTraverser *trav, const CullTraverserData &data) const; @@ -1072,6 +1076,8 @@ class EXPCL_PANDA_GOBJ Texture : public TypedWritableReferenceCount, public Nama typedef pmap Contexts; Contexts _contexts; + patomic_signed_lock_free _num_async_transfer_buffers { 0 }; + // It is common, when using normal maps, specular maps, gloss maps, and // such, to use a file naming convention where the filenames of the special // maps are derived by concatenating a suffix to the name of the diffuse diff --git a/panda/src/gsgbase/graphicsStateGuardianBase.h b/panda/src/gsgbase/graphicsStateGuardianBase.h index 80d31f5c3c4..d49f144046f 100644 --- a/panda/src/gsgbase/graphicsStateGuardianBase.h +++ b/panda/src/gsgbase/graphicsStateGuardianBase.h @@ -22,6 +22,7 @@ #include "lightMutex.h" #include "patomic.h" #include "small_vector.h" +#include "completionToken.h" // A handful of forward references. @@ -149,6 +150,7 @@ class EXPCL_PANDA_GSGBASE GraphicsStateGuardianBase : public TypedWritableRefere virtual TextureContext *prepare_texture(Texture *tex)=0; virtual bool update_texture(TextureContext *tc, bool force)=0; + virtual bool update_texture(TextureContext *tc, bool force, CompletionToken token)=0; virtual void release_texture(TextureContext *tc)=0; virtual void release_textures(const pvector &contexts)=0; virtual bool extract_texture_data(Texture *tex)=0; diff --git a/panda/src/putil/CMakeLists.txt b/panda/src/putil/CMakeLists.txt index ecc14b9843c..c99dfac3837 100644 --- a/panda/src/putil/CMakeLists.txt +++ b/panda/src/putil/CMakeLists.txt @@ -20,6 +20,9 @@ set(P3PUTIL_HEADERS clockObject.h clockObject.I collideMask.h colorSpace.h + completable.I completable.h + completionCounter.I completionCounter.h + completionToken.I completionToken.h copyOnWriteObject.h copyOnWriteObject.I copyOnWritePointer.h copyOnWritePointer.I compareTo.I compareTo.h @@ -86,6 +89,7 @@ set(P3PUTIL_SOURCES callbackObject.cxx clockObject.cxx colorSpace.cxx + completionCounter.cxx copyOnWriteObject.cxx copyOnWritePointer.cxx config_putil.cxx configurable.cxx diff --git a/panda/src/putil/completable.I b/panda/src/putil/completable.I new file mode 100644 index 00000000000..96d140be33c --- /dev/null +++ b/panda/src/putil/completable.I @@ -0,0 +1,75 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file completable.I + * @author rdb + * @date 2025-01-22 + */ + +#ifndef CPPPARSER +/** + * + */ +template +INLINE Completable:: +Completable(Callable callback) : + _data(new LambdaData(std::move(callback), [](Data *data, bool do_run) { + LambdaData *self = (LambdaData *)data; + if (do_run) { + std::move(self->_lambda)(); + } + delete self; + })) { +} +#endif + +/** + * + */ +INLINE Completable:: +Completable(Completable &&from) noexcept : + _data(from._data) { + from._data = nullptr; +} + +/** + * + */ +INLINE Completable &Completable:: +operator =(Completable &&from) { + Data *data = _data; + _data = from._data; + from._data = nullptr; + if (data != nullptr) { + data->_function.load(std::memory_order_relaxed)(data, false); + } + return *this; +} + +/** + * + */ +INLINE Completable:: +~Completable() { + Data *data = _data; + if (data != nullptr) { + data->_function.load(std::memory_order_relaxed)(data, false); + } +} + +/** + * + */ +INLINE void Completable:: +operator ()() { + Data *data = _data; + _data = nullptr; + if (data != nullptr) { + data->_function.load(std::memory_order_relaxed)(data, true); + } +} diff --git a/panda/src/putil/completable.h b/panda/src/putil/completable.h new file mode 100644 index 00000000000..9b0f6fdd129 --- /dev/null +++ b/panda/src/putil/completable.h @@ -0,0 +1,82 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file completable.h + * @author rdb + * @date 2025-01-22 + */ + +#ifndef COMPLETABLE_H +#define COMPLETABLE_H + +#include "pandabase.h" +#include "patomic.h" + +/** + * Stores a type-erased callable that is move-only. May only be called once. + */ +class EXPCL_PANDA_PUTIL Completable { +public: + constexpr Completable() = default; + +#ifndef CPPPARSER + template + INLINE Completable(Callable callback); +#endif + + INLINE Completable(const Completable ©) = delete; + INLINE Completable(Completable &&from) noexcept; + + INLINE Completable &operator =(const Completable ©) = delete; + INLINE Completable &operator =(Completable &&from); + + INLINE void operator ()(); + + INLINE ~Completable(); + +protected: + // There are several design approaches here: + // 1. Optimize for no data block: do not require dynamic allocation of a data + // block in the simple case where the callback data is only the size of a + // single pointer. Store two pointers, one function pointer and a data + // pointer(-sized storage), directly on the class here. + // 2. Optimize for a data block: store the function pointer on the data block, + // always requiring dynamic allocation. + // + // Right now I have opted for 2 because it allows the function pointer to be + // dynamically swapped (used in CompletionCounter), but this decision may + // change in the future. + + struct Data; + typedef void CallbackFunction(Data *, bool); + + struct Data { + patomic _function { nullptr }; + }; + + template + struct LambdaData : public Data { + // Must unfortunately be defined inline, since this struct is protected. + LambdaData(Lambda lambda, CallbackFunction *function) : + _lambda(std::move(lambda)) { + _function = function; + } + + Lambda _lambda; + }; + + Data *_data = nullptr; + + friend class AsyncFuture; + friend class CompletionCounter; + friend class CompletionToken; +}; + +#include "completable.I" + +#endif diff --git a/panda/src/putil/completionCounter.I b/panda/src/putil/completionCounter.I new file mode 100644 index 00000000000..6d591433607 --- /dev/null +++ b/panda/src/putil/completionCounter.I @@ -0,0 +1,97 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file completionCounter.I + * @author rdb + * @date 2025-01-22 + */ + +/** + * + */ +INLINE CompletionCounter:: +~CompletionCounter() { + CounterData *data = _data; + if (data != nullptr) { + // then() is not called; we still need something that destructs the data + // when done. + auto prev_function = data->_function.exchange(&abandon_callback, std::memory_order_relaxed); + if (prev_function == nullptr) { + // Was already done. + delete data; + } + } +} + +/** + * Returns a new token. May not be called after then(). + */ +INLINE CompletionToken CompletionCounter:: +make_token() { + CompletionToken token; + if (_data == nullptr) { + _data = new CounterData; + _data->_function = &initial_callback; + } + auto old_value = _data->_counter.fetch_add(1); + nassertr(old_value >= 0, token); + token._callback._data = _data; + return token; +} + +/** + * Runs the given callback immediately upon completion. If the counter is + * already done, runs it immediately. This requires an rvalue because it + * consumes the counter, use std::move() if you don't have an rvalue. + * + * The callback will either be called immediately or directly when the last + * token calls complete(), however, it may also be called if a token is + * destroyed. This may happen at unexpected times, such as when the lambda + * holding the token is destroyed prematurely. In this case, however, the + * passed success argument will always be false. + */ +template +INLINE void CompletionCounter:: +then(Callable callable) && { + // Replace the callback pointer with something that calls the given callable + // once the count reaches 0. + CounterData *data = _data; + nassertv(data != nullptr); + _data = nullptr; + if (data->_function.load(std::memory_order_acquire) == nullptr) { + // Already done. + callable((data->_counter.load(std::memory_order_relaxed) & ~0xffff) == 0); + delete data; + return; + } + + static_assert(sizeof(Callable) <= sizeof(data->_storage), + "raise storage size in completionCounter.h or reduce lambda captures"); + + new (data->_storage) Callable(std::move(callable)); + + Completable::CallbackFunction *new_function = + [] (Completable::Data *data_ptr, bool success) { + CounterData *data = (CounterData *)data_ptr; + auto prev_count = data->_counter.fetch_add((success ? 0 : 0x10000) - 1, std::memory_order_release); + if ((short)(prev_count & 0xffff) > 1) { + return; + } + + Callable *callable = (Callable *)data->_storage; + std::move(*callable)(success && (prev_count & ~0xffff) == 0); + callable->~Callable(); + delete data; + }; + + auto prev_function = data->_function.exchange(new_function, std::memory_order_acq_rel); + if (UNLIKELY(prev_function == nullptr)) { + // Last token finished in the meantime. + new_function(data, (data->_counter.load(std::memory_order_relaxed) & ~0xffff) == 0); + } +} diff --git a/panda/src/putil/completionCounter.cxx b/panda/src/putil/completionCounter.cxx new file mode 100644 index 00000000000..2540867e61f --- /dev/null +++ b/panda/src/putil/completionCounter.cxx @@ -0,0 +1,52 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file completionCounter.cxx + * @author rdb + * @date 2025-01-24 + */ + +#include "completionCounter.h" + +/** + * Called when a token is completed before then() is called. + */ +void CompletionCounter:: +initial_callback(Completable::Data *data_ptr, bool success) { + CounterData &data = *(CounterData *)data_ptr; + auto prev_count = data._counter.fetch_add((success ? 0 : 0x10000) - 1, std::memory_order_release); + if ((prev_count & 0xffff) == 1) { + // We're done early. + auto prev_callback = data._function.exchange(nullptr, std::memory_order_acq_rel); + nassertv(prev_callback != nullptr); + + // Someone called then() in the meantime. Call the new callback. The + // refcount will drop below 0 when that's called but they are designed to + // handle that. + if (prev_callback != &initial_callback) { + prev_callback(data_ptr, success && (prev_count & ~0xffff) == 0); + } + } +} + +/** + * Called when a token is completed after this object is destroyed without + * then() being called. + */ +void CompletionCounter:: +abandon_callback(Completable::Data *data_ptr, bool success) { + CounterData &data = *(CounterData *)data_ptr; + auto prev_count = data._counter.fetch_sub(1, std::memory_order_relaxed); + if ((prev_count & 0xffff) <= 1) { + // Done. + auto prev_callback = data._function.exchange(nullptr, std::memory_order_relaxed); + nassertv(prev_callback != nullptr); + nassertv(prev_callback == &abandon_callback); + delete &data; + } +} diff --git a/panda/src/putil/completionCounter.h b/panda/src/putil/completionCounter.h new file mode 100644 index 00000000000..dbb0e2dcfb4 --- /dev/null +++ b/panda/src/putil/completionCounter.h @@ -0,0 +1,58 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file completionCounter.h + * @author rdb + * @date 2025-01-22 + */ + +#ifndef COMPLETIONCOUNTER_H +#define COMPLETIONCOUNTER_H + +#include "pandabase.h" +#include "completionToken.h" + +#include + +/** + * Shared counter that generates "completion tokens" incrementing a counter, + * which will decrement the counter once they are finished. After the tokens + * are handed out, a callback may be registered using then(), which will be + * called as soon as the last token is done. + */ +class EXPCL_PANDA_PUTIL CompletionCounter { +public: + constexpr CompletionCounter() = default; + CompletionCounter(const CompletionCounter ©) = delete; + + INLINE ~CompletionCounter(); + + INLINE CompletionToken make_token(); + + template + INLINE void then(Callable callable) &&; + +private: + static void initial_callback(Completable::Data *data, bool success); + static void abandon_callback(Completable::Data *data, bool success); + +protected: + struct CounterData : public Completable::Data { + // Least significant half is counter, most significant half is error count + patomic_signed_lock_free _counter { 0 }; + + // Just raise this if the static_assert fires (or limit the size of your + // lambda captures). + alignas(std::max_align_t) unsigned char _storage[64]; + }; + CounterData *_data = nullptr; +}; + +#include "completionCounter.I" + +#endif diff --git a/panda/src/putil/completionToken.I b/panda/src/putil/completionToken.I new file mode 100644 index 00000000000..aef06a7d4eb --- /dev/null +++ b/panda/src/putil/completionToken.I @@ -0,0 +1,42 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file completionToken.I + * @author rdb + * @date 2025-01-22 + */ + +#ifndef CPPPARSER +/** + * Creates a token that calls the given callback when it's done, passing it + * true on success and false on failure or abandonment. + */ +template +INLINE CompletionToken:: +CompletionToken(Callable callback) { + // Main difference over a Completable is that this will always call the + // callback, even on failure, so that cleanup can be done. + _callback._data = new Completable::LambdaData(std::move(callback), [](Completable::Data *data, bool success) { + Completable::LambdaData *self = (Completable::LambdaData *)data; + std::move(self->_lambda)(success); + delete self; + }); +} +#endif + +/** + * + */ +INLINE void CompletionToken:: +complete(bool success) { + Completable::Data *data = _callback._data; + if (data != nullptr) { + _callback._data = nullptr; + data->_function.load(std::memory_order_relaxed)(data, success); + } +} diff --git a/panda/src/putil/completionToken.h b/panda/src/putil/completionToken.h new file mode 100644 index 00000000000..b73f4fe376b --- /dev/null +++ b/panda/src/putil/completionToken.h @@ -0,0 +1,56 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file completionToken.h + * @author rdb + * @date 2025-01-22 + */ + +#ifndef COMPLETIONTOKEN_H +#define COMPLETIONTOKEN_H + +#include "pandabase.h" +#include "pnotify.h" +#include "completable.h" + +/** + * A completion token can be created from a callback, future or + * CompletionCounter and can be passed into an asynchronous operation in order + * to receive a signal when it is done. + * + * The asynchronous operation should call complete() on it when it is done, + * with a boolean value indicating success or failure. If the token is + * destroyed prematurely, it is treated as if it called complete(false). + * + * This should be preferred over passing an AsyncFuture into a method since + * a CompletionToken provides both more flexibility in use (due to accepting + * an arbitrary callback) and more safety (since the RAII semantics guarantees + * that the callback is never silently dropped). + * + * The token may only be moved, not copied. + */ +class EXPCL_PANDA_PUTIL CompletionToken { +public: + constexpr CompletionToken() = default; + +#ifndef CPPPARSER + template + INLINE CompletionToken(Callable callback); +#endif + + void complete(bool success); + +protected: + Completable _callback; + + friend class CompletionCounter; +}; + +#include "completionToken.I" + +#endif diff --git a/panda/src/putil/p3putil_composite1.cxx b/panda/src/putil/p3putil_composite1.cxx index f78459eff71..c464d7708c9 100644 --- a/panda/src/putil/p3putil_composite1.cxx +++ b/panda/src/putil/p3putil_composite1.cxx @@ -17,6 +17,7 @@ #include "callbackObject.cxx" #include "clockObject.cxx" #include "colorSpace.cxx" +#include "completionCounter.cxx" #include "config_putil.cxx" #include "configurable.cxx" #include "copyOnWriteObject.cxx"