/*******************************************************************************
* Copyright 2016-2020 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

#if !defined( __IW_TILING_IWPP__ )
#define __IW_TILING_IWPP__

#include "iw_tiling_base.h"
#include "iw++/iw.hpp"


// Structure for thread writing. This structure will be accessed through TLS, so each thread will have its own instance.
struct TilingTlsData
{
    ipp::IwiTilePipeline    tile[6];        // Array of tiling nodes
    Image                   inter_32f[2];   // Set of intermediate buffers for data processing
    Image                   inter_8u;       // Set of intermediate buffers for data processing
    AutoBuffer<Ipp8u>       sharpBuffer;    // Temporary work buffer for ippiFilterSharpenBorder
};

class TilingIW: public TilingBase
{
public:
    TilingIW(ParallelInterface parallel = PARALLEL_NONE)
    {
        SetParallelInterface(parallel);
    }
    virtual ~TilingIW() {}
    virtual void Release()
    {
        m_tls.ReleaseAll();
    }

    Status InitBuffers(Image &src, Image &dst)
    {
        src.ConvertColor(m_srcColor);
        src.ConvertSamples(m_srcType);

        dst.Alloc(src.m_size, m_dstColor, m_dstType);

        return STS_OK;
    }

    Size GetMinTile()
    {
        TilingTlsData *pData = m_tls.Get();
        if(pData)
            return IppSizeToImage(pData->tile[0].GetMinTileSize());
        else
            return Size();
    }

    Status Init(Image &src, Image &dst, Size maxTile)
    {
        Release();

        InitExternal(src, dst);

        // Convert filter masks to borders size
        m_sobelBorderSize = ipp::iwiSizeToBorderSize(ipp::iwiMaskToSize(m_sobelMask));
        m_sharpBorderSize = ipp::iwiSizeToBorderSize(ipp::iwiMaskToSize(m_sharpMask));
        m_gaussBorderSize = ipp::iwiSizeToBorderSize(m_gaussMask);

        m_maxTile = maxTile;

        Status status = InitForThread(src, dst);
        CHECK_STATUS_PRINT_RS(status, "InitForThread", GetBaseStatusString(status));

        return STS_OK;
    }

    Status InitForThread(Image &, Image &dst)
    {
        TilingTlsData *pData = m_tls.Get();

        if(!pData)
        {
            pData = m_tls.Create();
            if(!pData)
                return STS_ERR_ALLOC;

            try
            {
                // If we have complex pipeline with several functions which have borders and we want to process whole pipeline by
                // tiles, we need to make sure that tiles for each function is positioned with precise offset and size according to requirements of previous function parameters.
                // iwiTilePipeline functions create chain of ROIs to automatically track dependencies between tiles for different functions in pipeline.
                //
                // Advanced tiles initialization must be performed in reverse order: from dst to src. Tiles are navigated
                // relative to final dst image.
                //
                // This function will initialize Tile for the final operation: iwiScale 32f->8u
                pData->tile[5].Init(ipp::IwiSize((IwSize)m_maxTile.width, (IwSize)m_maxTile.height), ipp::IwiSize((IwSize)dst.m_size.width, (IwSize)dst.m_size.height));

                // Initialize Tile for the ippiFilterSharpenBorder
                pData->tile[4].InitChild(pData->tile[5], m_border, m_sharpBorderSize);

                // Initialize Tile for the iwiFilterSobel
                pData->tile[3].InitChild(pData->tile[4], m_border, m_sobelBorderSize);

                // Initialize Tile for the iwiFilterGaussian
                pData->tile[2].InitChild(pData->tile[3], m_border, m_gaussBorderSize);

                // Initialize Tile for the iwiScale 8u->32f
                pData->tile[1].InitChild(pData->tile[2]);

                // Initialize Tile for the iwiColorConvert
                pData->tile[0].InitChild(pData->tile[1]);

                ipp::IwiSize minTile = pData->tile[0].GetMinTileSize();
                if(m_maxTile.width < minTile.width || m_maxTile.height < minTile.height)
                {
                    PRINT_MESSAGE("Tile size is too small for the pipeline");
                    return STS_ERR_INVALID_PARAMS;
                }

                // Allocate intermediate buffers
                pData->inter_8u.Alloc(IppSizeToImage(pData->tile[0].GetDstBufferSize()), m_dstColor, m_srcType);
                // We can use only two buffer with swapping since they have similar sizes. Use tile[1] dst size since it should be the biggest.
                pData->inter_32f[0].Alloc(IppSizeToImage(pData->tile[1].GetDstBufferSize()), m_dstColor, m_interType);
                pData->inter_32f[1].Alloc(IppSizeToImage(pData->tile[1].GetDstBufferSize()), m_dstColor, m_interType);
            }
            catch(ipp::IwException ex)
            {
                CHECK_STATUS_PRINT_AC(ex.m_status, "catch(IwException)", iwGetStatusString(ex), return STS_ERR_FAILED);
            }
        }

        return STS_OK;
    }

    virtual Status Run(Image &src, Image &dst, Rect tile)
    {
        Status status = InitForThread(src, dst);
        CHECK_STATUS_PRINT_RS(status, "InitForThread", GetBaseStatusString(status));

        try
        {
            TilingTlsData *pData = m_tls.Get();
            if(!pData)
                return STS_ERR_FAILED;

            ipp::IwiRoi      ippTile((IwSize)tile.x, (IwSize)tile.y, (IwSize)tile.width, (IwSize)tile.height);
            ipp::IwiColorFmt ippSrcColor  = ImageColorToIpp(m_srcColor);
            ipp::IwiColorFmt ippDstColor  = ImageColorToIpp(m_dstColor);

            ipp::IwiImage iwSrc = ImageToIwImage(src);
            ipp::IwiImage iwDst = ImageToIwImage(dst);
            ipp::IwiImage iwInter_8u = ImageToIwImage(pData->inter_8u);
            ipp::IwiImage iwInter[2] = {
                ImageToIwImage(pData->inter_32f[0]),
                ImageToIwImage(pData->inter_32f[1])
            };

            // Set Tile for ROI chain
            // This function updates ROIs in chain to align according to current tile coordinates and size.
            // SetTile method can be called only once for particular tile and for any ROI in the chain. It will
            // automatically find top ROI in chain and propagate tile parameters correctly.
            pData->tile[5].SetTile(ippTile);

            // Stage 1. Color conversion
            ipp::iwiColorConvert(iwSrc, ippSrcColor, iwInter_8u, ippDstColor, IwValueMax, ipp::IwDefault(), pData->tile[0]);

            // Stage 2. Scaling 8u->32f
            double mul, add;
            ipp::iwiScale_GetScaleVals(iwInter_8u.m_dataType, iwInter[0].m_dataType, mul, add);
            ipp::iwiScale(iwInter_8u, iwInter[0], mul, add, ipp::IwDefault(), pData->tile[1]);

            // Stage 3. Gaussian filter
            ipp::iwiFilterGaussian(iwInter[0], iwInter[1], m_gaussMask, 1, ipp::IwDefault(), m_border, pData->tile[2]);

            // Stage 4. Sobel filter
            ipp::iwiFilterSobel(iwInter[1], iwInter[0], m_sobelType, m_sobelMask, ipp::IwDefault(), m_border, pData->tile[3]);

            // Stage 5. Sharping.
            // This Intel IPP function is given here as an example of usage of any non-IW function with advanced tiling.
            // To use advanced tiling with non-IW function some manual steps must be taken.
            // 1. IwiTile functions assume that IW function will make buffer boundary check, so we need to perform such
            //    check to prevent Out Of Buffer access.
            // 2. IW function shifts buffer according to local buffer offset in IwiTile
            // 3. Border flags must be correctly set for functions with borders according to current local and absolute
            //    positions
            // 4. Border should be reconstructed according to the current tile position.
            // There are special functions which allow to perform all these steps just like in IW functions.
            {
                IppStatus          ippStatus;
                ipp::IwiBorderType sharpBorder;
                IppiSize           sharpSize;

                // Step 1: Create local sub-images according to current tile parameters.
                ipp::IwiImage sharpSrc = iwInter[0].GetRoiImage(pData->tile[4].GetBoundedSrcRoi());
                ipp::IwiImage sharpDst = iwInter[1].GetRoiImage(pData->tile[4].GetBoundedDstRoi());

                // Step 2: Update inMem flags in border variable according to current tile position
                // These border values are for Intel IPP functions, but for non- Intel IPP functions it should be similar, if they
                // support manual borders memory type parameters. You can check border for specific flags
                // (e.g.: border&ippBorderInMemLeft) and convert them into flags suitable for your functionality.
                sharpBorder = pData->tile[4].GetTileBorder(m_border);

                // Step 3: Border reconstruction. This function will build border for intermediate steps to make
                // image borders "transparent" for the function.
                pData->tile[4].BuildBorder(sharpSrc, sharpBorder);

                // Since src and dst buffers may differ, get minimal size.
                sharpSize.width  = (int)IPP_MIN(sharpSrc.m_size.width,  sharpDst.m_size.width);
                sharpSize.height = (int)IPP_MIN(sharpSrc.m_size.height, sharpDst.m_size.height);

                // Allocate buffer for the function.
                {
                    int bufferSize;

                    ippStatus = ippiFilterSharpenBorderGetBufferSize(sharpSize, m_sharpMask, sharpSrc.m_dataType, sharpDst.m_dataType, sharpSrc.m_channels, &bufferSize);
                    CHECK_STATUS_PRINT_AC(ippStatus, "ippiFilterSharpenBorderGetBufferSize()", ippGetStatusString(ippStatus), return STS_ERR_FAILED);

                    if(bufferSize && bufferSize > (int)pData->sharpBuffer.GetSize())
                    {
                        pData->sharpBuffer.Alloc(bufferSize);
                        if(!pData->sharpBuffer)
                        {
                            PRINT_MESSAGE("Cannot allocate memory for ippiFilterSharpenBorder_32f_C1R");
                            return STS_ERR_ALLOC;
                        }
                    }
                }

                ippStatus = ippiFilterSharpenBorder_32f_C1R((Ipp32f*)sharpSrc.ptr(), (int)sharpSrc.m_step, (Ipp32f*)sharpDst.ptr(), (int)sharpDst.m_step, sharpSize, m_sharpMask, sharpBorder, 0, pData->sharpBuffer);
                CHECK_STATUS_PRINT_AC(ippStatus, "ippiFilterSharpenBorder_32f_C1R()", ippGetStatusString(ippStatus), return STS_ERR_FAILED);
            }

            // Stage 6. Scaling 32f->8u
            ipp::iwiScale_GetScaleVals(iwInter[1].m_dataType, iwDst.m_dataType, mul, add);
            ipp::iwiScale(iwInter[1], iwDst, mul, add, ipp::IwDefault(), pData->tile[5]);
        }
        catch(ipp::IwException ex)
        {
            CHECK_STATUS_PRINT_AC(ex.m_status, "catch(IwException)", iwGetStatusString(ex), return STS_ERR_FAILED);
        }

        return STS_OK;
    }

public:
    ipp::IwTls<TilingTlsData> m_tls;

    Size               m_maxTile;
    ipp::IwiBorderSize m_sobelBorderSize;
    ipp::IwiBorderSize m_gaussBorderSize;
    ipp::IwiBorderSize m_sharpBorderSize;
};

#endif
