/*******************************************************************************
* Copyright 2016-2020 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

#include "pifsobel_t.h"

/* /////////////////////////////////////////////////////////////////////////////
//  Name:               ippiFilterSobelGetBufferSize_T
//
//  Purpose:            Computes the size of the external buffer and spec for Sobel operator for a pipeline version.
//
//  Parameters:
//   roiSize            Size of destination ROI in pixels.
//   maskId             Predefined mask of IppiMaskSize type.
//   normType           Normalization mode if IppNormTYpe type.
//   srcDataType        Data type of the source image.
//   dstDataType        Data type of the destination image.
//   numChannels        Number of channels in the images. Possible values is 1.
//   pSpecSize          Pointer to the size (in bytes) of the spec.
//   pBufferSize        Pointer to the size (in bytes) of the external work buffer.
//
//  Return Values:
//   ippStsNoErr          Indicates no error.
//   ippStsNullPtrErr     Indicates an error when pBufferSize is NULL.
//   ippStsSizeErr        Indicates an error when roiSize is negative, or equal to zero.
//   ippStsMaskSizeErr    Indicates an error condition if mask has a wrong value.
//   ippStsBadArgErr      Indicates an error condition if normType has an illegal value.
//   ippStsDataTypeErr    Indicates an error when srcDataType or dstDataType has an illegal value.
//   ippStsNumChannelsErr Indicates an error when numChannels has an illegal value.
*/
IPPFUN(IppStatus, ippiFilterSobelGetBufferSize_T, (IppiSize roiSize, IppiMaskSize maskId, IppNormType normType, IppDataType srcDataType,
                                                   IppDataType dstDataType, int numChannels, int *pSpecSize, int *pBufferSize))
{
    IppStatus status = ippStsNoErr;

    if (pSpecSize == 0 || pBufferSize == 0) return ippStsNullPtrErr;
    if (roiSize.width <= 0 || roiSize.height <= 0) return ippStsSizeErr;
    if (dstDataType != ipp16s && dstDataType != ipp32f) return ippStsDataTypeErr;
    /* L1 norm not implemented in the pipeline */
    if (normType != ippNormL2) return ippStsNormErr;

    Ipp32s numThreads;
    IppiSize sliceSize, lastSliceSize;
    IppiPoint splitImage;

    int sizeofDstType = (dstDataType == ipp16s) ? sizeof(Ipp16s) : sizeof(Ipp32f);
    int bufferSizeH   = 0;
    int bufferSizeV   = 0;

    ippGetNumThreads_T(&numThreads);

    ownGetSobelSliceSize(roiSize, maskId, &sliceSize, &lastSliceSize, &splitImage);

    IppiSize maxSliceSize = {IPP_MAX(sliceSize.width, lastSliceSize.width), IPP_MAX(sliceSize.height, lastSliceSize.height)};

    status = ippiFilterSobelHorizBorderGetBufferSize(maxSliceSize, maskId, srcDataType, dstDataType, numChannels, &bufferSizeH);
    if (status != ippStsNoErr) return status;

    status = ippiFilterSobelVertBorderGetBufferSize(maxSliceSize, maskId, srcDataType, dstDataType, numChannels, &bufferSizeV);
    if (status >= 0)
    {
        *pSpecSize  = ALIGNED_SIZE(sizeof(SobelInfo));

        int maxSliceBufferSize = maxSliceSize.width * sizeofDstType * maxSliceSize.height;
        *pBufferSize = (IPP_MAX(bufferSizeH, bufferSizeV) + ALIGNED_SIZE(maxSliceBufferSize) + 4 * maxSliceSize.width * sizeofDstType) * numThreads;
    }

    return status;
}

/* /////////////////////////////////////////////////////////////////////////////
//  Name:               ippiFilterSobelInit_T
//
//  Purpose:            Initialize Sobel Filter spec structure
//
//  Parameters:
//   roiSize            Size of destination ROI in pixels.
//   maskId             Predefined mask of IppiMaskSize type.
//   normType           Normalization mode of IppNormType type.
//   srcDataType        Data type of the source image.
//   dstDataType        Data type of the destination image.
//   numChannels        Number of channels in the images.
//   pSpec              Pointer to the spec structure
//
//  Return Values:
//   ippStsNoErr          Indicates no error.
//   ippStsNullPtrErr     Indicates an error when pBufferSize is NULL.
//   ippStsSizeErr        Indicates an error when roiSize is negative, or equal to zero.
//   ippStsMaskSizeErr    Indicates an error condition if mask has a wrong value.
//   ippStsBadArgErr      Indicates an error condition if normType has an illegal value.
//   ippStsDataTypeErr    Indicates an error when srcDataType or dstDataType has an illegal value.
//   ippStsNumChannelsErr Indicates an error when numChannels has an illegal value.
*/
IPPFUN(IppStatus, ippiFilterSobelInit_T, (IppiSize roiSize, IppiMaskSize maskId, IppNormType normType, IppDataType srcDataType,
                                          IppDataType dstDataType, int numChannels, IppiFilterSobelSpec_T *pSpec))
{
    IppStatus status = ippStsNoErr;

    if (pSpec == 0)     return ippStsNullPtrErr;
    if (roiSize.width <= 0 || roiSize.height <= 0) return ippStsSizeErr;
    if (dstDataType != ipp16s && dstDataType != ipp32f) return ippStsDataTypeErr;
    /* L1 norm not implemented in the pipeline */
    if (normType != ippNormL2) return ippStsNormErr;

    SobelInfo *pSobelInfo = 0;
    IppiSize sliceSize, lastSliceSize;
    IppiPoint splitImage;

    int sizeofDstType = (dstDataType == ipp16s) ? sizeof(Ipp16s) : sizeof(Ipp32f);
    int bufferSizeH   = 0;
    int bufferSizeV   = 0;

    splitImage.x = splitImage.y = 0;

    ownGetSobelSliceSize(roiSize, maskId, &sliceSize, &lastSliceSize, &splitImage);

    pSobelInfo = (SobelInfo*)ALIGNED_PTR(pSpec);

    IppiSize maxSliceSize = {IPP_MAX(sliceSize.width, lastSliceSize.width), IPP_MAX(sliceSize.height, lastSliceSize.height)};

    status = ippiFilterSobelHorizBorderGetBufferSize(maxSliceSize, maskId, srcDataType, dstDataType, numChannels, &bufferSizeH);
    if (status != ippStsNoErr) return status;

    status = ippiFilterSobelVertBorderGetBufferSize(maxSliceSize, maskId, srcDataType, dstDataType, numChannels, &bufferSizeV);
    if (status != ippStsNoErr) return status;

    /* An additional buffer per slice to store intermediate calculation result of FilterSobelHorizontal */
    int maxSliceBufferSize = maxSliceSize.width * sizeofDstType * maxSliceSize.height;

    pSobelInfo->sliceBufferSize = IPP_MAX(bufferSizeH, bufferSizeV) + ALIGNED_SIZE(maxSliceBufferSize) + 4 * maxSliceSize.width * sizeofDstType;
    pSobelInfo->intermediateBufferSize = maxSliceBufferSize;
    pSobelInfo->lineBufferSize         = maxSliceSize.width * 2 * sizeofDstType; /* sizeofType here = 2 * sizeofDstType */
    pSobelInfo->lastSliceSize.width    = lastSliceSize.width;
    pSobelInfo->lastSliceSize.height   = lastSliceSize.height;
    pSobelInfo->sliceSize.width        = sliceSize.width;
    pSobelInfo->sliceSize.height       = sliceSize.height;
    pSobelInfo->splitImage.x           = splitImage.x;
    pSobelInfo->splitImage.y           = splitImage.y;
    pSobelInfo->maskId                 = maskId;
    pSobelInfo->normType               = normType;
    pSobelInfo->roiSize                = roiSize;

    return status;
}

/* /////////////////////////////////////////////////////////////////////////////
//  Name:               ippiFilterSobel_8u16s_C1R_T_Fun
//
//  Purpose:            Kernel to be called in parallel_for of Threading Layer -
//                      it runs full Filter Sobel functions pipeline for particular slice of the image:
//
//                         ippiFilterSobelHorizBorder_8u16s_C1R
//                         ippiFilterSobelVertBorder_8u16s_C1R
//                         ippsMul_16s_ISfs
//                         ippiMul_16s_C1IRSfs
//                         ippiAdd_16s_C1IRSfs
//                         ippiSqrt_16s_C1IRSfs
//
//  Parameters:
//   t                  thread index
//   arg                pointer to the Filter Sobel threading structure
//
//  Return Values:
//   ippStsNoErr            Indicates no error.
//   ippStsNullPtrErr       Indicates an error when pBufferSize is NULL.
//   ippStsSizeErr          Indicates an error when roiSize is negative, or equal to zero.
//   ippStsNotEvenStepErr   Indicated an error when one of the step values is not divisible by 4
//                          for floating-point images, or by 2 for short-integer images.
//   ippStsBorderErr        Indicates an error when borderType has illegal value.
//   ippStsSqrtNegArg       Indicates that source image pixel has a negative value
*/
static IppStatus ippiFilterSobel_8u16s_C1R_T_Fun(int t, void *arg)
{
    IppStatus status        = ippStsNoErr;

    ippiFilterSobel_T_Str * ts = (ippiFilterSobel_T_Str *)arg;

    const Ipp8u *pSrc          = (const Ipp8u *)ts->pSrc;
    int srcStep                = ts->srcStep;
    Ipp16s *pDst               = ts->pDst;
    int dstStep                = ts->dstStep;
    IppiMaskSize maskId        = ts->maskId;
    IppNormType normType       = ts->normType;
    IppiBorderType border      = ts->borderType;
    Ipp8u borderValue          = ts->borderValue;
    Ipp8u *pBuffer             = ts->pBuffer;
    int sliceBufferSize        = ts->sliceBufferSize;
    int intermediateBufferSize = ts->intermediateBufferSize;
    int lineBufferSize         = ts->lineBufferSize;
    IppiPoint splitImage       = ts->splitImage;
    IppiSize sliceSize         = ts->sliceSize;
    IppiSize lastSliceSize     = ts->lastSliceSize;

    Ipp8u *sliceBuffer         = 0;

    int tWidth  = sliceSize.width;
    int tHeight = sliceSize.height;

    IppiSize roiSize;
    int tx, ty; /* slice coordinates */
    IppiBorderType borderTrd  = border;
    IppiBorderType borderTrdW = borderTrd;

    int threadIdx = 0;
    ippGetThreadIdx_T(&threadIdx);

    ty = t / splitImage.x;
    tx = t % splitImage.x;

    roiSize.height = tHeight;
    if (lastSliceSize.height && (ty == (int)(splitImage.y - 1)))
        roiSize.height = lastSliceSize.height;
    roiSize.width = tWidth;
    if (lastSliceSize.width && (tx == (int)(splitImage.x - 1)))
        roiSize.width = lastSliceSize.width;
    int dstStepIntermediate = roiSize.width * sizeof(*pDst);

    sliceBuffer = ALIGNED_PTR(pBuffer + sliceBufferSize * threadIdx); /* storage for temporary result from FilterSobelHorizontal */
    pBuffer     = sliceBuffer + intermediateBufferSize + 2 * lineBufferSize;               /* temporary calculations buffer                           */

    Ipp8u*  pSliceSrc             = (Ipp8u*) ((Ipp8u*)(pSrc + tx * tWidth) + ty * tHeight * srcStep);
    Ipp16s* pSliceDst             = (Ipp16s*)((Ipp8u*)(pDst + tx * tWidth) + ty * tHeight * dstStep);
    Ipp16s* pSliceDstIntermediate = (Ipp16s*)(sliceBuffer);

    if ((splitImage.y > 1))
    {
        if (ty == 0) borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom);
        else if (ty == (int)(splitImage.y - 1)) borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemTop);
        else  borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom | (int)ippBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1))
    {
        if (tx == 0) borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight);
        else if (tx == (int)(splitImage.x - 1)) borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemLeft);
        else  borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight | (int)ippBorderInMemLeft);
    }

    /* Intel IPP functions calls */
    status = ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)pSliceSrc, srcStep, (Ipp16s*)pSliceDstIntermediate, dstStepIntermediate,
                                                  roiSize, maskId, borderTrdW, borderValue, pBuffer);
    if (status != ippStsNoErr) return status;

    status = ippiFilterSobelVertBorder_8u16s_C1R((const Ipp8u*) pSliceSrc, srcStep, (Ipp16s*)pSliceDst, dstStep,
                                                 roiSize, maskId, borderTrdW, borderValue, pBuffer);
    if (status != ippStsNoErr) return status;

    Ipp16s *pLineDst = 0;
    Ipp16s *pLineDstIntermediate = 0;
    Ipp32s *pLineDst_32s = (Ipp32s *)(sliceBuffer + intermediateBufferSize);
    Ipp32s *pLineDstIntermediate_32s = (Ipp32s *)(sliceBuffer + intermediateBufferSize + lineBufferSize);
    for (int i = 0; i < roiSize.height; ++i) {
        pLineDst = (Ipp16s *)((Ipp8u *)pSliceDst + i * dstStep);
        pLineDstIntermediate = (Ipp16s *)((Ipp8u *)pSliceDstIntermediate + i * dstStepIntermediate);

        status = ippsMul_16s32s_Sfs((const Ipp16s *)pLineDstIntermediate, (const Ipp16s *)pLineDstIntermediate, (Ipp32s *)pLineDstIntermediate_32s, roiSize.width, 0);
        if (status != ippStsNoErr) return status;

        status = ippsMul_16s32s_Sfs((const Ipp16s *)pLineDst, (const Ipp16s *)pLineDst, (Ipp32s *)pLineDst_32s, roiSize.width, 0);
        if (status != ippStsNoErr) return status;

        status = ippsAdd_32s_Sfs((const Ipp32s *)pLineDstIntermediate_32s, (const Ipp32s *)pLineDst_32s, (Ipp32s *)pLineDst_32s, roiSize.width, 0);
        if (status != ippStsNoErr) return status;

        status = ippsSqrt_32s16s_Sfs((const Ipp32s *)pLineDst_32s, (Ipp16s *)pLineDst, roiSize.width, 0);
        if (status != ippStsNoErr) return status;
    }

    return status;
}

/* /////////////////////////////////////////////////////////////////////////////
//  Name:       ippiFilterSobel_8u16s_C1R_T
//
//  Purpose:    Computes Filter Sobel using per slices function pipeline - the whole functions pipeline
//              runs for each slice of an image
//
//  Parameters:
//   pSrc           Pointer to the source image
//   srcStep        Source image step
//   pDst           Pointer the the destination image
//   dstStep        Destination image step
//   border         Type of the border
//   borderValue    Pointer to the constant value(s) if border type equals ippBorderConstant
//   pSpec          Pointer to Filter Sobel spec structure
//   pBuffer        Pointer to a temporary buffer
//
//  Return Values:
//   ippStsNoErr            Indicates no error.
//   ippStsNullPtrErr       Indicates an error when pBufferSize is NULL.
//   ippStsSizeErr          Indicates an error when roiSize is negative, or equal to zero.
//   ippStsNotEvenStepErr   Indicated an error when one of the step values is not divisible by 4
//                          for floating-point images, or by 2 for short-integer images.
//   ippStsBorderErr        Indicates an error when borderType has illegal value.
//   ippStsSqrtNegArg       Indicates that source image pixel has a negative value
//
*/
IPPFUN(IppStatus, ippiFilterSobel_8u16s_C1R_T, (const Ipp8u *pSrc, int srcStep, Ipp16s *pDst, int dstStep, IppiBorderType border,
                                                Ipp8u borderValue, IppiFilterSobelSpec_T *pSpec, Ipp8u* pBuffer))
{
    IppStatus statusAll = ippStsNoErr;

    if (pSrc == 0 || pDst == 0) return ippStsNullPtrErr;
    if (pSpec == 0 || pBuffer == 0) return ippStsNullPtrErr;

    int numChannels      = 1;
    Ipp32u maxNumThreads = 1;
    SobelInfo *pInfo;           /* Sobel Info structure */

    pInfo = (SobelInfo*)ALIGNED_PTR(pSpec);

    ippGetNumThreads_T((int*)&maxNumThreads);

    int numTiles = pInfo->splitImage.x * pInfo->splitImage.y;
    ippiFilterSobel_T_Str ts;
    filterSobelThreadingStructureEncode_8u16s((Ipp8u*)pSrc, srcStep, (Ipp16s*)pDst, dstStep, pInfo->roiSize, pInfo->maskId, pInfo->normType,
        border, borderValue, pBuffer, pInfo->sliceBufferSize, pInfo->intermediateBufferSize, pInfo->lineBufferSize, numChannels,
                                              pInfo->splitImage, pInfo->sliceSize, pInfo->lastSliceSize, &ts);

    if (maxNumThreads == 1)
    {
        int status = ippStsNoErr;

        /* Sequential calls of functions of Sobel Filter pipeline for each slice */
        for (int i = 0; i < numTiles; i++)
        {
            status = ippiFilterSobel_8u16s_C1R_T_Fun(i, (void*)&ts);
            if (status != ippStsNoErr) statusAll = status;
        }
    }
    else
    {
        statusAll = ippParallelFor_T(numTiles, (void*)&ts, ippiFilterSobel_8u16s_C1R_T_Fun);
    }

    return statusAll;
}
