/*******************************************************************************
* Copyright 2018-2020 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

#include <math.h>
#include <memory>

#include "base.h"
#include "base_image.h"
#include "base_renderer.h"
#include "base_ipp.h"

#include "ippcore_tl.h"
#include "ippi_tl.h"
#include "ipps.h"
#include "ippcore_tl.h"

#if defined USE_OMP
#include <omp.h>
#elif defined USE_TBB
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include "tbb/global_control.h"
#endif

static void printVersion()
{
    const IppLibraryVersion *pVer = ippsGetLibVersion();

    printf("\nIntel(R) IPP Threading Layers Example: Sobel Filter (function level parallelism)");
    printf("\nIt demonstrates function level parallelism approach implemented for Sobel edge-detector filter pipeline.");
    printf("\nIt consequently runs functions of Sobel filter pipeline, each threaded using Intel IPP Threading Layer.\n");
    printf("\nBased on:");

    IppThreadingType thrType;
    ippGetThreadingType_LT (&thrType);
    printf("\nIntel(R) IPP Threading Layer (%s)", (thrType==OMP)? "OpenMP" : "TBB");

    printf("\nIntel(R) IPP: %s %s %s", pVer->Name, pVer->Version, pVer->BuildDate);
    printf("\n");
}

static void printHelp(const cmd::OptDef pOptions[], char* argv[])
{
    printf("\nUsage: %s [-i] InputFile [[-o] OutputFile] [Options]\n", GetProgName(argv));
    printf("Options:\n");
    cmd::OptUsage(pOptions);
}

int main(int argc, char *argv[])
{
    /*
    // Variables initialization
    */
    Status       status          = STS_OK;
    DString      sInputFile      = CheckTestDirs( BMP_GRAYSCALE_FILE );
    DString      sOutputFile;
    DString      sIppCpu;

    unsigned int  threads        = 0;
    bool          bPrintHelp     = false;

    Image srcData;
    Image dstData;
	Image intermediateDst;

    // General timing
    vm_tick      tickStart   = 0;
    vm_tick      tickAcc     = 0;
    vm_tick      tickFreq    = vm_time_get_frequency()/1000;
    double       fTime       = 0;
    double       fTimeLimit  = 0;
    unsigned int iLoops      = 0;
    unsigned int iLoopsLimit = 0;

    /*
    // Cmd parsing
    */
    const cmd::OptDef cmdOpts[] =
    {
        { 'i', "", 1, cmd::KT_DSTRING,   cmd::KF_OPTIONAL,  &sInputFile,      "input file name" },
        { 'o', "", 1, cmd::KT_DSTRING,   cmd::KF_OPTIONAL,  &sOutputFile,     "output file name" },
#if defined USE_OMP
        { 't', "", 1, cmd::KT_INTEGER,   0,                 &threads,         "number of threads for Threading Layer interface (OpenMP)" },
#elif defined USE_TBB
        { 't', "", 1, cmd::KT_INTEGER,   0,                 &threads,         "number of tasks for Threading Layer interface (TBB task_scheduler_init)" },
#endif
        { 'w', "", 1, cmd::KT_DOUBLE,    0,                 &fTimeLimit,      "minimum test time in milliseconds" },
        { 'l', "", 1, cmd::KT_POSITIVE,  0,                 &iLoopsLimit,     "number of loops (overrides test time)" },
        { 'T', "", 1, cmd::KT_DSTRING,   0,                 &sIppCpu,         "target Intel IPP optimization (" IPP_OPT_LIST ")" },
        { 'h', "", 1, cmd::KT_BOOL,      0,                 &bPrintHelp,      "print help and exit" },
        {0}
    };

    if (cmd::OptParse(argc, argv, cmdOpts))
    {
        printHelp(cmdOpts, argv);
        PRINT_MESSAGE("Invalid input parameters");
        return 1;
    }

    InitPreferredCpu(sIppCpu.c_str());

    // Check default image availability
    if ( !strcmp(sInputFile.c_str(), BMP_GRAYSCALE_FILE) ) {
        bPrintHelp = ( -1 == vm_file_access(sInputFile.c_str(), 0) );
    }

    if(bPrintHelp)
    {
        printHelp(cmdOpts, argv);
        return 0;
    }

    if(!sInputFile.Size())
    {
        printHelp(cmdOpts, argv);
        PRINT_MESSAGE("Cannot open input file");
        return 1;
    }

    printVersion();

    for(;;)
    {
        IppStatus       ippStatus;
        IppiSize        ippSrcSize;
        IppDataType     ippSrcType;
        IppDataType     ippDstType;
        IppiMaskSize    sobelMaskId = ippMskSize3x3;
        IppiBorderType  borderType  = (IppiBorderType)(ippBorderConst);
        Ipp8u           borderValue = 0;
        IppNormType     normType    = ippNormL2;

        AutoBuffer<Ipp8u>  ippFSobelBuffer;
        AutoBuffer<Ipp16s> ippFSobelIntermediateBuffer16s;
		AutoBuffer<Ipp32s> ippFSobelIntermediateBuffer32s;

        // Read from file
        printf("\nInput file: %s\n", sInputFile.c_str());
        status = srcData.Read(sInputFile, CF_GRAY, ST_8U);
        CHECK_STATUS_PRINT_BR(status, "Image::Read()", GetBaseStatusString(status));
        printf("Input info: %dx%d %s\n", (int)srcData.m_size.width, (int)srcData.m_size.height, colorFormatName[srcData.m_color]);

        if (!threads) threads = 1;

#if defined USE_OMP
        omp_set_num_threads (threads);
#elif defined USE_TBB
		tbb::global_control set_num_threads(tbb::global_control::max_allowed_parallelism, threads); // set_num_threads(threads)
#endif
        status = dstData.Alloc(srcData.m_size, srcData.m_color, ST_16S);
        CHECK_STATUS_PRINT_BR(status, "dstData.Alloc", GetBaseStatusString(status));

		status = intermediateDst.Alloc(srcData.m_size, srcData.m_color, ST_32S);
		CHECK_STATUS_PRINT_BR(status, "intermediateDst.Alloc", GetBaseStatusString(status));

        printf("\nOutput file: %s\n", (sOutputFile.Size())?sOutputFile.c_str():"-");
        printf("Output info: %dx%d %s\n\n", (int)dstData.m_size.width, (int)dstData.m_size.height, colorFormatName[dstData.m_color]);

        ippSrcSize = ImageSizeToIppOld(srcData.m_size);
        ippSrcType = ImageFormatToIpp(srcData.m_sampleFormat);
        ippDstType = ImageFormatToIpp(dstData.m_sampleFormat);

        int bufferSizeH = 0;
        int bufferSizeV = 0;

        // Get buffer sizes for Filter Sobel Horizontal and Filter Sobel Vertical
        ippStatus = ippiFilterSobelHorizBorderGetBufferSize_T(ippSrcSize, sobelMaskId, ippSrcType, ippDstType, 1 /* numChannels */, &bufferSizeH);
        CHECK_STATUS_PRINT_BR(ippStatus, "ippiFilterSobelHorizBorderGetBufferSize_T()", ippGetStatusString(ippStatus));

        ippStatus = ippiFilterSobelVertBorderGetBufferSize_T(ippSrcSize, sobelMaskId, ippSrcType, ippDstType, 1 /* numChannels */, &bufferSizeV);
        CHECK_STATUS_PRINT_BR(ippStatus, "ippiFilterSobelVertBorderGetBufferSize_T()", ippGetStatusString(ippStatus));
        // Allocate common buffer for temporary calculations
        ippFSobelBuffer.Alloc(bufferSizeH > bufferSizeV ? bufferSizeH : bufferSizeV);

        // Allocate buffers to store intermediate results of calculations in the pipeline processing
        ippFSobelIntermediateBuffer16s.Alloc(ippSrcSize.width * ippSrcSize.height);
		ippFSobelIntermediateBuffer32s.Alloc(ippSrcSize.width * ippSrcSize.height);

        printf("API:       Threading Layer\n");
        printf("Type:      Pipeline per Image\n");
        printf("Threads:   %u\n",    threads);

        for(iLoops = 1, tickAcc = 0;; iLoops++)
        {
            tickStart = vm_time_get_tick();

            // Start of the pipeline

            // 1. Sobel filter Horizontal (Threading Layer function applied to the whole image)
            ippStatus = ippiFilterSobelHorizBorder_8u16s_C1R_T((const Ipp8u*)srcData.ptr(), srcData.m_step, ippFSobelIntermediateBuffer16s, dstData.m_step,
                                                               ippSrcSize, sobelMaskId, borderType, borderValue, ippFSobelBuffer);
            CHECK_STATUS_PRINT_BR(ippStatus, "ippiFilterSobelHorizBorder_8u16s_C1R_T()", ippGetStatusString(ippStatus));

            // 2. Sobel filter Vertical (Threading Layer function applied to the whole image)
            ippStatus = ippiFilterSobelVertBorder_8u16s_C1R_T((const Ipp8u*)srcData.ptr(), srcData.m_step, (Ipp16s*)dstData.ptr(), dstData.m_step,
                                                              ippSrcSize, sobelMaskId, borderType, borderValue, ippFSobelBuffer);
            CHECK_STATUS_PRINT_BR(ippStatus, "ippiFilterSobelVertBorder_8u16s_C1R_T()", ippGetStatusString(ippStatus));

            // 3. Sqr of Sobel filter Horizontal values (Threading Layer function applied to the whole image)	
			ippStatus = ippiSqr_16s32s_C1RSfs_T(ippFSobelIntermediateBuffer16s, dstData.m_step, ippFSobelIntermediateBuffer32s, intermediateDst.m_step, ippSrcSize, 0 /* scale */);
            CHECK_STATUS_PRINT_BR(ippStatus, "ippiSqr_16s_C1IRSfs_T()", ippGetStatusString(ippStatus));

            // 4. Sqr of Sobel filter Vertical values (Threading Layer function applied to the whole image)
			ippStatus = ippiSqr_16s32s_C1RSfs_T((Ipp16s*)dstData.ptr(), dstData.m_step, (Ipp32s*)intermediateDst.ptr(), intermediateDst.m_step, ippSrcSize, 0 /* scale */);
            CHECK_STATUS_PRINT_BR(ippStatus, "ippiSqr_16s_C1IRSfs_T()", ippGetStatusString(ippStatus));

            // 5. Addition of calculations from item 3. and 4. (Threading Layer function applied to the whole image)
			ippStatus = ippiAdd_32s_C1IRSfs_T(ippFSobelIntermediateBuffer32s, intermediateDst.m_step, (Ipp32s*)intermediateDst.ptr(), intermediateDst.m_step, ippSrcSize, 0 /* scale */);
            CHECK_STATUS_PRINT_BR(ippStatus, "ippiAdd_16s_C1IRSfs_T()", ippGetStatusString(ippStatus));

            // 6. Square root of calculations from item 5. (Threading Layer function applied to the whole image)
			ippStatus = ippiSqrt_32s16s_C1RSfs_T((Ipp32s*)intermediateDst.ptr(), intermediateDst.m_step, (Ipp16s*)dstData.ptr(), dstData.m_step, ippSrcSize, 0 /* scale */);
            CHECK_STATUS_PRINT_BR(ippStatus, "ippiSqrt_16s_C1IRSfs_T()", ippGetStatusString(ippStatus));

            // End of the pipeline

            tickAcc += (vm_time_get_tick() - tickStart);
            fTime = (double)tickAcc/tickFreq;
            if(iLoopsLimit)
            {
                if(iLoops >= iLoopsLimit)
                    break;
            }
            else
            {
                if(fTime >= fTimeLimit)
                    break;
            }
        }
        if (status < 0) break;

        /*
        // Results output
        */
        printf("\nLoops:      %u\n", iLoops);
        printf("Time total: %0.3fms\n", fTime);
        printf("Loop avg:   %0.3fms\n", fTime/iLoops);

        if(sOutputFile.Size())
        {
            Image dstData_8u;
            status = dstData_8u.Alloc(dstData.m_size, dstData.m_color, ST_8U);

            Ipp16s* s = (Ipp16s*)dstData.ptr();
            Ipp8u*  d = (Ipp8u*) dstData_8u.ptr();
            for(int i = 0; i < dstData.m_size.width * dstData.m_size.height; i++)
            {
                d[i] = (s[i] < 0 ? 0 : (s[i] > 255 ? 255 : (Ipp8u)s[i]));
            }
            status = dstData_8u.Write(sOutputFile.c_str());
            CHECK_STATUS_PRINT_BR(status, "Image::Write()", GetBaseStatusString(status));
        }

        break;
    }

    if(status < 0)
        return status;

    return 0;
}
