/*******************************************************************************
* Copyright 2016-2020 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

#if (defined _DEBUG && defined _WIN32)
#define _CRTDBG_MAP_ALLOC
#include <crtdbg.h>
#endif

#include <math.h>
#include <memory>

#include "iw_tiling_iw++.h"
#include "iw_tiling_no_pipe.h"
#include "iw_tiling_ref.h"

#include "base.h"
#include "base_image.h"
#include "base_iw.h"
#include "base_renderer.h"

static void printVersion()
{
    printf("\nIntel(R) IPP Integration Wrappers Example: Advanced Threading");
    printf("\nThis example demonstrates how scalability of tiling and threading depends on tile size and number of threads.\n");
    printf("\nBased on:");
    printf("\nIntel(R) IPP Integration Wrappers: %s", ipp::IwVersion().getInfoString().c_str());
    printf("\nIntel(R) IPP: %s\n", ipp::IppVersion().getInfoString().c_str());
}

static void printHelp(const cmd::OptDef pOptions[], char* argv[])
{
    printf("\nUsage: %s [-i] InputFile [[-o] OutputFile] [Options]\n", GetProgName(argv));
    printf("Options:\n");
    cmd::OptUsage(pOptions);
}

static double getKarpFlatt(double speedup, int threads)
{
    if(threads > 1)
        return (1/(speedup) - 1./threads)/(1-1./threads);
    else
        return 0;
}

int main(int argc, char *argv[])
{
#ifdef _CRTDBG_MAP_ALLOC
    _CrtSetDbgFlag(_CRTDBG_LEAK_CHECK_DF | _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG));
#endif

    /*
    // Variables initialization
    */
    Status       status         = STS_OK;
    DString      sInputFile     = CheckTestDirs( BMP_RGB_FILE );
    DString      sOutputFile;
    DString      sCSV;
    DString      sIppCpu;

    int           threadsMin     = 0;
    int           threadsMax     = 0;
    int           threadsStep    = 2;
    int           threads        = -1;
    int           tileSizeP[2]   = {-1, -1};
    int           tileSizeMin[2] = {32, 8};
    double        tileDivider[2] = {2, 2};
    bool          reinit         = false;
    bool          bNoWindow      = true;
    bool          bPrintHelp     = false;
    File          csv;

    // Use these switches for profiling (e.g. Intel(R) VTune)
    bool          allowRef       = true;
    bool          allowPipe      = true;
    bool          allowNP        = true;

    TilingRef    tilingRef;
    TilingIW     tilingIW(PARALLEL_ANY);
    TilingNoPipe tilingNP(PARALLEL_ANY);

    Image srcData;
    Image dstData;
    Image dstDataNP;
    Image dstRef;

    // General timing
    vm_tick      tickStart   = 0;
    vm_tick      tickAcc     = 0;
    vm_tick      tickFreq    = vm_time_get_frequency()/1000;
    double       timeP       = 0;
    double       timeNP      = 0;
    double       timeRef     = 0;
    unsigned int loops       = 0;
    unsigned int loopsLimit  = 100;

    /*
    // Cmd parsing
    */
    const cmd::OptDef cmdOpts[] = {
        { 'i', "",              1, cmd::KT_DSTRING,   cmd::KF_OPTIONAL,  &sInputFile,       "Input file name" },
        { 'o', "",              1, cmd::KT_DSTRING,   cmd::KF_OPTIONAL,  &sOutputFile,      "Output file name" },
        { 't', "threads",       1, cmd::KT_INTEGER,   0,                 &threads,          "Manual number of threads (threads iterations will be ignored)" },
        {   0, "threads-min",   1, cmd::KT_POSITIVE,  0,                 &threadsMin,       "Start number of threads range" },
        {   0, "threads-max",   1, cmd::KT_POSITIVE,  0,                 &threadsMax,       "End number of threads range" },
        {   0, "threads-step",  1, cmd::KT_POSITIVE,  0,                 &threadsStep,      "Step between start and end of the threads range" },
        { 'b', "tile",          2, cmd::KT_INTEGER,   0,                 &tileSizeP,        "Manual tile size (tile iterations will be ignored)"},
        {   0, "tile-min",      2, cmd::KT_INTEGER,   0,                 &tileSizeMin,      "Minimal tile size for tiling iterations"},
        {   0, "tile-divider",  2, cmd::KT_DOUBLE,    0,                 &tileDivider,      "Tile divider for tiling iterations. 2 by default"},
        { 'r', "",              1, cmd::KT_BOOL,      0,                 &reinit,           "Re-initialize data for each loop and include initialization time in timings" },
        {   0, "csv",           1, cmd::KT_DSTRING,   0,                 &sCSV,             "CSV file name to save performance data" },
        { 'l', "",              1, cmd::KT_POSITIVE,  0,                 &loopsLimit,       "Number of loops per iteration" },
        { 'T', "",              1, cmd::KT_DSTRING,   0,                 &sIppCpu,          "Target Intel IPP optimization (" IPP_OPT_LIST ")" },
        { 'h', "help",          1, cmd::KT_BOOL,      0,                 &bPrintHelp,       "Print help and exit" },
        {0}
    };

    if(cmd::OptParse(argc, argv, cmdOpts))
    {
        printHelp(cmdOpts, argv);
        PRINT_MESSAGE("invalid input parameters");
        return 1;
    }

    InitPreferredCpu(sIppCpu.c_str());

    printVersion();

    // Check default image availability
    if ( !strcmp(sInputFile.c_str(), BMP_GRAYSCALE_FILE) ) {
        bPrintHelp = ( -1 == vm_file_access(sInputFile.c_str(), 0) );
    }

    if(bPrintHelp)
    {
        printHelp(cmdOpts, argv);
        return 0;
    }

    if(!sInputFile.Size())
    {
        printHelp(cmdOpts, argv);
        PRINT_MESSAGE("Cannot open input file");
        return 1;
    }

    for(;;)
    {
        Size tileSizeInit(tileSizeP[0], tileSizeP[1]);

        if(sCSV.Size())
        {
            status = csv.Open(sCSV.c_str(), "w");
            CHECK_STATUS_PRINT_BR(status, "File::Open()", GetBaseStatusString(status));
        }

        // Read from file
        printf("\nInput file: %s\n", sInputFile.c_str());
        status = srcData.Read(sInputFile, CF_RGB, ST_8U);
        CHECK_STATUS_PRINT_BR(status, "Image::Read()", GetBaseStatusString(status));

        status = TilingBase::InitExternal(srcData, dstData);
        CHECK_STATUS_PRINT_BR(status, "TilingRef::InitExternal()", GetBaseStatusString(status));

        printf("Input info: %dx%d %s\n", (int)srcData.m_size.width, (int)srcData.m_size.height, colorFormatName[srcData.m_color]);

        printf("\nOutput file: %s\n", (sOutputFile.Size())?sOutputFile.c_str():"-");
        printf("Output info: %dx%d %s\n\n", (int)dstData.m_size.width, (int)dstData.m_size.height, colorFormatName[dstData.m_color]);

        status = tilingNP.Init(srcData, dstDataNP);
        CHECK_STATUS_PRINT_BR(status, "TilingNoPipe::Init()", GetBaseStatusString(status));
        if(reinit)
            tilingNP.Release();

        status = tilingRef.Init(srcData, dstRef);
        CHECK_STATUS_PRINT_BR(status, "TilingRef::Init()", GetBaseStatusString(status));
        if(reinit)
            tilingRef.Release();

        if(threads >= 0)
        {
            tilingNP.ThreadsSetNum(threads);
            tilingIW.ThreadsSetNum(threads);
            threads = tilingIW.ThreadsGetNum();
            threadsMin = threads;
            threadsMax = threads;
        }
        else
        {
            if(threadsMax == 0)
            {
                tilingIW.ThreadsSetNum(0);
                threadsMax = tilingIW.ThreadsGetNum();
            }
            if(threadsMin > threadsMax)
                threadsMin = threadsMax;
            if(threadsStep < 1)
                threadsStep = 1;
        }

        Size tileMin(tileSizeMin[0], tileSizeMin[1]);
        if(tileDivider[0] < 1)
            tileDivider[0] = 1;
        if(tileDivider[1] < 1)
            tileDivider[1] = 1;
        if(tileMin.width == 0)
            tileDivider[0] = 1;
        if(tileMin.height == 0)
            tileDivider[1] = 1;

        printf("Threading:        %s\n",          ParallelToString(tilingIW.m_parallelType));
        printf("Threads Min:      %d\n",          threadsMin);
        printf("Threads Max:      %d\n",          threadsMax);
        printf("Threads Step:     %d\n",          threadsStep);
        printf("Tile Min:         %dx%d\n",       tileSizeMin[0], tileSizeMin[1]);
        printf("Tile Divider      %0.1fx%0.1f\n", tileDivider[0],  tileDivider[1]);
        printf("Loops:            %d\n",          (int)loopsLimit);

        if(allowRef)
        {
            printf("\nMeasuring reference...\n");
            for(loops = 1, tickAcc = 0;; loops++)
            {
                tickStart = vm_time_get_tick();
                if(reinit)
                    tilingRef.Init(srcData, dstRef);
                status = tilingRef.Run(srcData, dstRef);
                tickAcc += (vm_time_get_tick() - tickStart);
                CHECK_STATUS_PRINT_BR(status, "TilingRef::Run()", GetBaseStatusString(status));

                timeRef = (double)tickAcc/tickFreq;
                if(loopsLimit)
                {
                    if(loops >= loopsLimit)
                        break;
                }
            }
            if(status < 0) break;
            timeRef /= loops;
        }

        printf("\nThreads | Tile Width | Tile Height | Tile Mem. (KB) | Ref. (ms) | Pipe (ms) | R/P   | NPipe (ms) | R/NP  | NP/P\n");
        csv.Print("Threads;Tile Width;Tile Height;Tile Footprint (KB);Tile Overhead;Reference (ms);Pipe (ms);Non-Pipe (ms);Pipe Karp-Flatt;Non-Pipe Karp-Flatt;\n");

        if(threadsStep == 1)
            threadsMin = 1;
        int threadsIters = (threadsMax-threadsMin)/threadsStep;
        for(int threadIdx = 0; threadIdx <= threadsIters; threadIdx++)
        {
            Size tileSize;
            threads = threadsStep*threadIdx+threadsMin;
            if(threads == 0)
                threads = 1;

            tilingNP.ThreadsSetNum(threads);
            tilingIW.ThreadsSetNum(threads);

            bool manH = (tileSizeInit.height >= 0);
            bool manW = (tileSizeInit.width >= 0);

            if(tileSizeInit.height == 0)
                tileSize.height = dstData.m_size.height;
            else if(tileSizeInit.height < 0)
                tileSize.height = dstData.m_size.height/threads;
            else
                tileSize.height = tileSizeInit.height;

            int  footprint;
            Size minTile;
            for(;; tileSize.height = (long long)(tileSize.height/tileDivider[1]))
            {
                if(tileSize.height < 1)
                    break;
                if(tileSizeInit.width <= 0)
                    tileSize.width = dstData.m_size.width;
                else
                    tileSize.width = tileSizeInit.width;

                if(tileMin.height > 0 && tileSize.height < tileMin.height)
                    break;
                else
                {
                    footprint = TilingBase::GetTileFootprint(tileSize, dstData.m_size);
                    if(footprint < 32 && !manH)
                        break;
                }

                for(;; tileSize.width = (long long)(tileSize.width/tileDivider[0]))
                {
                    if(tileSize.width < 1)
                        break;

                    if(tileMin.width > 0 && tileSize.width < tileMin.width)
                        break;
                    else
                    {
                        footprint = TilingBase::GetTileFootprint(tileSize, dstData.m_size);
                        if(footprint < 32 && !manW)
                            break;
                    }

                    if(allowPipe)
                    {
                        status = tilingIW.Init(srcData, dstData, tileSize);
                        CHECK_STATUS_PRINT_BR(status, "TilingIW::Init()", GetBaseStatusString(status));
                        if(reinit)
                            tilingIW.Release();
                    }

                    minTile = tilingIW.GetMinTile();
                    if(minTile.width > tileSize.width)
                        break;
                    if(minTile.height > tileSize.height)
                        break;

                    printf("%-7d | %-10d | %-11d | %-14d | %-9.3f |", threads, (int)tileSize.width, (int)tileSize.height, TilingBase::GetTileFootprint(tileSize, dstData.m_size), timeRef);

                    if(allowPipe)
                    {
                        for(loops = 1, tickAcc = 0;; loops++)
                        {
                            tickStart = vm_time_get_tick();
                            if(reinit)
                                tilingIW.Init(srcData, dstData, tileSize);
                            status = tilingIW.RunParallel(srcData, dstData, tileSize);
                            tickAcc += (vm_time_get_tick() - tickStart);
                            CHECK_STATUS_PRINT_BR(status, "TilingIW::Run()", GetBaseStatusString(status));

                            timeP = (double)tickAcc/tickFreq;
                            if(loopsLimit)
                            {
                                if(loops >= loopsLimit)
                                    break;
                            }
                        }
                        if(status < 0) break;
                        timeP /= loops;
                    }
                    printf(" %-9.3f | %-5.1f |", timeP, (timeP)?timeRef/timeP:0);

                    if(allowNP)
                    {
                        for(loops = 1, tickAcc = 0;; loops++)
                        {
                            tickStart = vm_time_get_tick();
                            if(reinit)
                                tilingNP.Init(srcData, dstDataNP);
                            status = tilingNP.RunParallel(srcData, dstDataNP, tileSize);
                            tickAcc += (vm_time_get_tick() - tickStart);
                            CHECK_STATUS_PRINT_BR(status, "TilingNoPipe::Run()", GetBaseStatusString(status));

                            timeNP = (double)tickAcc/tickFreq;
                            if(loopsLimit)
                            {
                                if(loops >= loopsLimit)
                                    break;
                            }
                        }
                        if(status < 0) break;
                        timeNP /= loops;
                    }
                    printf(" %-10.3f | %-5.1f | %-5.1f\n", timeNP, (timeNP)?timeRef/timeNP:0, (timeP)?timeNP/timeP:0);
                    if(csv.IsOpened())
                    {
                        csv.Print("%d;%d;%d;%d;%.3f;%.6f;%.6f;%.6f;%.3f;%.3f\n", threads, (int)tileSize.width, (int)tileSize.height, TilingBase::GetTileFootprint(tileSize, dstData.m_size),
                            TilingBase::GetTileOverhead(tileSize, dstData.m_size), timeRef, timeP, timeNP, getKarpFlatt(timeRef/timeP, threads), getKarpFlatt(timeRef/timeNP, threads));
                    }
                    if(manW || (tileSize.width == tileSize.width/tileDivider[0]))
                        break;

                }
                if(manH || (tileSize.height == tileSize.height/tileDivider[1]))
                    break;
            }
        }

        std::vector<double> diff;
        if(allowPipe)
        {
            diff = dstData.FindMaxDiff(dstRef);
            if(diff.size())
                printf("\nMax diff IW:   %.4f\n", diff[0]);
        }
        if(allowNP)
        {
            diff = dstDataNP.FindMaxDiff(dstRef);
            if(diff.size())
                printf("Max diff NP:   %.4f\n", diff[0]);
        }

        if(sOutputFile.Size())
        {
            status = dstData.Write(sOutputFile.c_str());
            CHECK_STATUS_PRINT_BR(status, "Image::Write()", GetBaseStatusString(status));
        }

        // Output rendering
        if(!bNoWindow)
        {
            WindowDraw draw("Intel(R) IPP IW Pipeline Threading Example");
            if(draw.IsInitialized())
            {
                printf("\nPress Space to cycle through stages:\n");
                printf("1 - result image\n");
                printf("2 - original image\n");
                printf("\nClose window to exit.\n");

                dstData.ConvertSamples(ST_8U);

                int  iIndex  = 0;
                bool bRedraw = true;
                while(!draw.IsClosed())
                {
                    vm_time_sleep(10);
                    if(draw.CheckKey() == KK_SPACE)
                    {
                        iIndex = (iIndex+1)%4;
                        bRedraw = true;
                    }
                    if(draw.IsInvalidated())
                        bRedraw = true;

                    if(bRedraw)
                    {
                        if(iIndex == 0)
                            draw.DrawImage(&dstData);
                        else if(iIndex == 1)
                            draw.DrawImage(&dstDataNP);
                        else if(iIndex == 2)
                            draw.DrawImage(&dstRef);
                        else if(iIndex == 3)
                            draw.DrawImage(&srcData);
                        bRedraw = false;
                    }
                }
            }
        }

        break;
    }

    if(status < 0)
        return status;
    return 0;
}
