/* -*-c++-*- */
/* osgEarth - Geospatial SDK for OpenSceneGraph
* Copyright 2020 Pelican Mapping
* http://osgearth.org
*
* osgEarth is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>
*/

#ifndef OSGEARTH_GLUTILS_H
#define OSGEARTH_GLUTILS_H 1

#include <osgEarth/Common>
#include <osgEarth/optional>
#include <osgEarth/Threading>
#include <osgEarth/Math>
#include <osg/StateSet>
#include <osg/OperationThread>
#include <osg/GraphicsContext>
#include <osg/GLObjects>
#include <osg/Drawable>
#include <osg/Texture2D>
#include <set>

#ifndef GLintptr
#define GLintptr std::intptr_t
#endif
#ifndef GLsizeiptr
#define GLsizeiptr std::uintptr_t
#endif

#ifndef GL_DYNAMIC_STORAGE_BIT
#define GL_DYNAMIC_STORAGE_BIT 0x0100
#endif

namespace osgUtil {
    class StateToCompile;
}

namespace osgEarth
{
    using namespace Threading;

    struct OSGEARTH_EXPORT GLUtils
    {
        //! Sets any default uniforms required by the implementation
        static void setGlobalDefaults(osg::StateSet* stateSet);

        //! Configure lighting (GL_LIGHTING)
        static void setLighting(osg::StateSet* stateSet, osg::StateAttribute::OverrideValue ov);
        
        //! Configure line width (GL_LINE_WIDTH)
        static void setLineWidth(osg::StateSet* stateSet, float value, osg::StateAttribute::OverrideValue ov);

        //! Configure line stippling (GL_LINE_STIPPLE)
        static void setLineStipple(osg::StateSet* stateSet, int factor, unsigned short pattern, osg::StateAttribute::OverrideValue ov);

        //! Configure line antialiasing (GL_LINE_SMOOTH)
        static void setLineSmooth(osg::StateSet* stateSet, osg::StateAttribute::OverrideValue ov);

        //! Configure point rendering size (GL_POINT_SIZE)
        static void setPointSize(osg::StateSet* stateSet, float value, osg::StateAttribute::OverrideValue ov);

        //! Configure point rounding/antialiasing (GL_POINT_SMOOTH)
        static void setPointSmooth(osg::StateSet* stateSet, osg::StateAttribute::OverrideValue ov);

        //! Removes the state associated with a GL capability, causing it to inherit from above.
        //! and if one of: GL_LIGHTING, GL_LINE_WIDTH, GL_LINE_STIPPLE, GL_LINE_SMOOTH, GL_POINT_SIZE
        static void remove(osg::StateSet* stateSet, GLenum cap);

        //! Computes the driver's reported alignment for SSBO allocation
        static GLsizei getSSBOAlignment(osg::State&);
    };

    struct OSGEARTH_EXPORT CustomRealizeOperation : public osg::Operation
    {
        virtual void operator()(osg::Object*);
        void setSyncToVBlank(bool);
        optional<bool> _vsync;
    };

    struct OSGEARTH_EXPORT GL3RealizeOperation : public CustomRealizeOperation
    {
        virtual void operator()(osg::Object*);
    };
    
    //! Base class for GL object containers
    class OSGEARTH_EXPORT GLObject
    {
    public:
        using Ptr = std::shared_ptr<GLObject>;
        virtual void release() = 0;
        osg::GLExtensions* ext() const { return _ext; }
    protected:
        GLObject(osg::State& state, const std::string& label);
        std::string _label;
        osg::GLExtensions* _ext;
    };

    //! A buffer object
    class OSGEARTH_EXPORT GLBuffer : public GLObject
    {
    public:
        using Ptr = std::shared_ptr<GLBuffer>;
        static Ptr create(GLenum target, osg::State& state, const std::string& label = "");

        void bind();
        void bind(GLenum target);
        GLuint name() const { return _name; }
        GLenum target() const { return _target; }
        void release();
    protected:
        GLBuffer(GLenum target, osg::State& state, const std::string& label);
    private:
        GLuint _name;
        GLenum _target;
    };

    //! A texture object with optional resident handle
    class OSGEARTH_EXPORT GLTexture : public GLObject
    {
    public:
        using Ptr = std::shared_ptr<GLTexture>;
        static Ptr create(GLenum target, osg::State& state, const std::string& label = "");

        void bind();
        GLuint64 handle();
        GLuint name() const { return _name; }
        void makeResident(bool toggle);
        void release();
        std::string& id() { return _id; }
    protected:
        GLTexture(GLenum target, osg::State& state, const std::string& label);
    private:
        GLenum _target;
        GLuint _name;
        GLuint64 _handle;
        bool _isResident;
        std::string _id;
    };

#if OSG_VERSION_GREATER_OR_EQUAL(3,6,0)
#define OE_USE_GRAPHICS_OBJECT_MANAGER
#endif

#ifdef OE_USE_GRAPHICS_OBJECT_MANAGER
    /**
     * Mechanism that will automatically delete a GL object
     * when its container goes out of scope
     */
    struct GLObjectReleaser : public osg::GraphicsObjectManager
    {
        GLObjectReleaser(unsigned contextID);

        static void watch(GLObject::Ptr, osg::State&);

        static void releaseAll(osg::State&);

        virtual void flushDeletedGLObjects(double currentTime, double& availableTime);
        virtual void flushAllDeletedGLObjects();
        virtual void deleteAllGLObjects();
        virtual void discardAllGLObjects();

        std::set<GLObject::Ptr> _objects;
        std::set<GLObject::Ptr> _temp;
        //std::set<osg::ref_ptr<GLObject> > _objects;
        //std::set<osg::ref_ptr<GLObject> > _temp;
    };

#else // legacy implementation for OSG 3.4
    struct GLObjectReleaser : public osg::GraphicsOperation
    {
        GLObjectReleaser(unsigned contextID);

        static void watch(GLObject*, osg::State&);

        static void releaseAll(osg::State&);

        void operator()(osg::GraphicsContext*);

        std::set<osg::ref_ptr<GLObject> > _objects;
        std::set<osg::ref_ptr<GLObject> > _temp;

        static osg::buffered_object<osg::ref_ptr<GLObjectReleaser> > _buf;
    };
#endif

    //! Base class for a shader storage buffer object
    class OSGEARTH_EXPORT SSBO
    {
    public:
        SSBO();

        mutable GLBuffer::Ptr _buffer;
        //mutable osg::ref_ptr<GLBuffer> _buffer;
        mutable GLsizei _allocatedSize;
        mutable GLsizei _requiredSize;
        GLint _bindingIndex;
        void bindLayout() const;
        virtual void release() const;
        inline bool valid() const { return _buffer != nullptr; } // .valid(); }
    };

    template<typename T>
    class ArraySSBO
    {
    public:
        ArraySSBO() : _buf(nullptr), _numElements(0), _bindingIndex(0)
        {
            _dirty.setAllElementsTo(false);
        }

        virtual ~ArraySSBO()
        {
            for (unsigned i=0; i<_ssbo.size(); ++i)
                _ssbo[i].release();
            if (_buf)
                delete[] _buf;
        }

        //! Sets the number of elements this object can hold
        void setNumElements(unsigned value)
        {
            if (value > _numElements)
            {
                _numElements = value;
                if (_buf) delete[] _buf;
                _buf = new T[_numElements];
                _dirty.setAllElementsTo(true);
            }
        }

        void setBindingIndex(unsigned value)
        {
            _bindingIndex = value;
        }

        T& operator[](int index)
        {
            return _buf[index];
        }

        void dirty()
        {
            _dirty.setAllElementsTo(1);
        }

        void apply(osg::State& state) const
        {
            if (_numElements == 0)
                return;

            bool bound = false;

            int i = state.getContextID();

            SSBO& ssbo = _ssbo[i];

            GLsizei contentSize = _numElements * sizeof(T);

            if (ssbo._allocatedSize < contentSize)
            {
                // reallocate the SSBO
                GLsizei alignedAllocationSize = align(contentSize, GLUtils::getSSBOAlignment(state));
                ssbo.release();
                ssbo._bindingIndex = _bindingIndex;
                ssbo._allocatedSize = contentSize;
                ssbo._buffer = GLBuffer::create(GL_SHADER_STORAGE_BUFFER, state, typeid(*this).name());
                ssbo._buffer->bind();
                bound = true;
                ssbo._buffer->ext()->glBufferStorage(GL_SHADER_STORAGE_BUFFER, alignedAllocationSize, nullptr, GL_DYNAMIC_STORAGE_BIT);
                _dirty[i] = 1;
            }

            if (_dirty[i])
            {
                if (!bound) ssbo._buffer->bind();
                ssbo._buffer->ext()->glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, contentSize, (const GLvoid*)_buf);
                _dirty[i] = 0;
            }

            ssbo.bindLayout();
        }

        void resizeGLObjectBuffers(unsigned maxSize)
        {
            _ssbo.resize(maxSize);
            _dirty.resize(maxSize);
            _dirty.setAllElementsTo(1);
        }

        void releaseGLObjects(osg::State* state) const
        {
            if (state)
            {
                _ssbo[state->getContextID()].release();
            }
            else
            {
                for (int i = 0; i < _ssbo.size(); ++i)
                {
                    _ssbo[i].release();
                }
            }
        }

    private:
        mutable osg::buffered_object<SSBO> _ssbo;
        mutable osg::buffered_object<int> _dirty;
        T* _buf;
        unsigned _numElements;
        unsigned _bindingIndex;
    };

    // Functions that aren't in osg::GLExtensions (some in 3.4, some in 3.6+)
    struct OSGEARTH_EXPORT GLFunctions
    {
        static GLFunctions& get(unsigned contextID);
        static GLFunctions& get(osg::State& state) { return get(state.getContextID()); }

        GLFunctions();
        void (GL_APIENTRY * glBufferStorage)(GLenum, GLuint, const void*, GLenum);
        void (GL_APIENTRY * glClearBufferSubData)(GLenum, GLenum, GLintptr, GLsizeiptr, GLenum, GLenum type, const void*);
        void (GL_APIENTRY * glMultiDrawElementsIndirect)(GLenum, GLenum, const void*, GLsizei, GLsizei);
        void (GL_APIENTRY * glDispatchComputeIndirect)(GLintptr);
        void (GL_APIENTRY * glTexStorage3D)(GLenum, GLsizei, GLenum, GLsizei, GLsizei, GLsizei);


    private:
        static GLFunctions _buf[256];
    };

    /**
     * API for launching GPU thread jobs. Any function dispatched here will
     * execute on the OSG graphics thread and return a future result.
     *
     * NOTE: This implementation will run the job under an arbitrary graphics
     * context. So it is not currently suitable for operations that must be
     * executed on multiple contexts.
     *
     * Example usage (graphics thread operation returning a bool):
     *
     *  // Dispatch the asynchronous job:
     *  GPUJob<bool>::Result result = GPUJob<bool>::dispatch(
     *      [=](osg::State* state, Cancelable* progress)
     *      {
     *           // do something 
     *           return bool;
     *      }
     *  );
     *
     *  // Block until the result is ready:
     *  bool value = result.get();
     */
    template<typename RESULT_TYPE>
    class GPUJob
    {
    public:
        //! Result type - future that will eventually contain the return value
        typedef Future<RESULT_TYPE> Result;

        //! Function type of async job
        typedef std::function<RESULT_TYPE(osg::State*, Cancelable*)> Function;

        //! Dispatch the asynchronous function.
        //! @param function Function to execute in the graphics thread
        //! @return Future result value. If this object goes out of scope,
        //!   the job may by canceled.
        static Result dispatch(
            const Function& function);
    };

    /**
     * Arena for dispatching GPU jobs.
     * Use the GPUJob<> API instead of accessing this directly.
     */
    class OSGEARTH_EXPORT GPUJobArena : public osg::GraphicsOperation
    {
    public:
        typedef std::function<void(osg::State*)> Delegate;

    public:
        //! Attach the arena to a graphics context
        void setGraphicsContext(osg::GraphicsContext*);
        osg::ref_ptr<osg::GraphicsContext> getGraphicsContext();

        //! Time (ms) allocated to running GPU jobs per frame
        //! Default = 0, meaning one job per frame is allowed.
        void setTimeSlice(const std::chrono::milliseconds& value);
        const std::chrono::milliseconds& getTimeSlice() const;

        //! Dispatch a GPU job delegate
        //! Use GPUJob::dispatch instead of calling this directly
        void dispatch(Delegate& job);

        //! Access to the job arena
        static GPUJobArena& arena();

        //! Jobs in queue
        std::size_t size() const;

    public:

        void operator()(osg::GraphicsContext*) override;

    protected:

        virtual ~GPUJobArena();

    private:
        GPUJobArena();

        mutable Mutex _queue_mutex;
        std::deque<Delegate> _queue;
        mutable osg::observer_ptr<osg::GraphicsContext> _gc;
        std::chrono::milliseconds _timeSlice;
        bool _done;
        static osg::ref_ptr<GPUJobArena> _arena_pool;
        static Mutex _arena_pool_mutex;
    };

    /**
     * Activates a GPUJobArena - place anywhere in the scene graph.
     */
    class OSGEARTH_EXPORT GPUJobArenaConnector : public osg::Drawable
    {
    public:
        GPUJobArenaConnector();
        void drawImplementation(osg::RenderInfo& ri) const override;
    protected:
        virtual ~GPUJobArenaConnector();
    };

    template<typename RESULT_TYPE>
    Future<RESULT_TYPE> GPUJob<RESULT_TYPE>::dispatch(const Function& function)
    {
        Promise<RESULT_TYPE> promise;
        Future<RESULT_TYPE> future = promise.getFuture();

        std::function<void(osg::State*)> delegate = [function, promise](osg::State* state) mutable
        {
            if (!promise.isAbandoned())
            {
                promise.resolve(function(state, &promise));
            }
        };
        GPUJobArena::arena().dispatch(delegate);
        return std::move(future);
    }

    /**
     * Base class for a GPU Compute job that generates or modifies
     * a raster image on the GPU and reads it back to the CPU.
     * The name of the image in the compute shader is "buf" and it
     * is bound to layout location zero (0).
     */
    class OSGEARTH_EXPORT ComputeImageSession
    {
    public:
        //! construct a new session
        ComputeImageSession();

        //! Sets the compute shader program to use
        void setProgram(osg::Program* program);

        //! Sets the image to submit to the compute shader
        void setImage(osg::Image* image);

        //! Runs the compute shader and waits for the result
        //! to be read back to the CPU.
        void execute();

    protected:
        osg::ref_ptr<osg::Image> _image;
        osg::ref_ptr<osg::StateSet> _stateSet;

        virtual void renderImplementation(osg::State* state) = 0;

    private:
        GLuint _pbo;
        osg::Texture2D* _tex;

        void render(osg::State* state);
        void readback(osg::State* state);
    };

    template<typename T>
    class PerThreadComputeSession
    {
    public:
        T& get(osg::Program* program) const {
            ScopedMutexLock lock(_sessions);
            SessionPtr& ptr = _sessions[getCurrentThreadId()];
            if (ptr == nullptr) {
                ptr = std::make_shared<T>();
                ptr->setProgram(program);
            }
            return *ptr.get();
        }
    private:
        using SessionPtr = std::shared_ptr<T>;
        using SessionPtrPerThread = Mutexed<std::unordered_map<unsigned, SessionPtr>>;
        mutable SessionPtrPerThread _sessions;
    };

    /**
     * Utility to "pre-compile" a node by running it through the ICO
     * if one exists in the Options. If there is no ICO, this is a no-op
     */
    class OSGEARTH_EXPORT GLObjectsCompiler
    {
    public:
        //! Analyze the node and collect the compilable state
        osg::ref_ptr<osgUtil::StateToCompile> collectState(
            osg::Node* node) const;

        void compileNow(
            const osg::ref_ptr<osg::Node>& node,
            const osg::Object* host,
            osgEarth::Threading::Cancelable* progress) const;

        Future<osg::ref_ptr<osg::Node>> compileAsync(
            const osg::ref_ptr<osg::Node>& node,
            const osg::Object* host,
            osgEarth::Threading::Cancelable* progress) const;

        Future<osg::ref_ptr<osg::Node>> compileAsync(
            const osg::ref_ptr<osg::Node>& node,
            osgUtil::StateToCompile* state,
            const osg::Object* host,
            osgEarth::Threading::Cancelable* progress) const;

        static int totalJobs() { return (int)_jobsActive; }

    private:
        static std::atomic_int _jobsActive;
    };
}

#endif // OSGEARTH_GLUTILS_H
