
# INTEL CONFIDENTIAL
# Copyright 2014 2018 Intel Corporation
#
# The source code  contained or  described herein and  all documents related to
# the source code  ("Material") are owned by Intel Corporation or its suppliers
# or licensors.  Title to the  Material  remains with  Intel Corporation or its
# suppliers  and licensors. The Material contains trade secrets and proprietary
# and  confidential  information  of  Intel  or  its  suppliers  and  licensors.
# The Material  is protected  by worldwide  copyright and trade secret laws and
# treaty provisions.  No part of the Material  may  be used, copied, reproduced,
# modified, published, uploaded, posted, transmitted, distributed, or disclosed
# in any way without Intel's prior express written permission. No license under
# any  patent,  copyright, trade secret or other intellectual property right is
# granted  to  or conferred upon you by disclosure or delivery of the Materials,
# either expressly, by implication, inducement, estoppel or otherwise.
# Any license under such intellectual property rights must be express and
# approved by Intel in writing.



from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import zipfile, tarfile
import hashlib
import shutil
import filelock
import pathlib
from ._py2to3 import *
from .. import settings
from datetime import datetime, timezone
from collections import OrderedDict

from ..logging import getLogger


### This fake lock is used if file locks have been turned off
class _DummyLock(filelock.BaseFileLock):
    def _acquire(self):
        self._lock_file_fd = 1

    def _release(self):
        self._lock_file_fd = None

_LOG = getLogger()

# get a path for where we should write our lock file
class UnzipCheck(object):
    """
    Take a path to a compressed file and determine whether
    we need to decompress it or if we already have

    Args:
        filename (str) : filepath ending with a zip
        expected_files (list) : list of files to make sure they exists
        single (bool) : True/(False) - whether archive contains single file

    expected_files :
        - if filename is a ZIP and no expected_files are specified, then
          the zip will be checked for whether the files exists.
        - For .tar.bz2, openining the bz2 to determine the expected files is
          too long, so expected_files will not be checked unless passed in.

    single :
        - this is used to help with .bz2 (or non-zips) when the file to check
          for existence has the same name as the archive, minus the extension.
          the expected_files is filled in as roughly: *filename - archive extension*
    """
    # extension name, function name mapping
    # needs to be list for the ordering...must check longer extensions before
    # the shorter ones
    extensions = OrderedDict([
                    ('.zip', "_unzip",),
                    ('.tar.bz2', "_unbz2"),
                    ('.tar.xz', "_unlzma"), # MUST be before .xz
                    ('.xz', "_unlzma"),
                ])

    #: internal variable to track name of checksum file we should write
    checksum_file = None
    #: list of files that we will check for their existance, so that we
    #: know we are missing a file and need to redo the decompression
    expected_files = None
    #: internal variable to cache checksum once we have computed it
    _checksum = None
    #: internal variable to track filename
    _filename = None

    def __init__(self, filename, expected_files=None, single=False):

        # in case unzip outputs subdirectories
        self._startdir, self._filename = os.path.split(
            os.path.abspath(filename)
        )
        self.checksum_file = self.filepath + ".md5"
        self.expected_files = expected_files
        if single:
            for ext in self.extensions:
                if self.filename.endswith(ext):
                    self.expected_files = [self.filename[:-len(ext)]]
                    break
            else:
                raise Exception("{0} does not end with known compression extension"
                                .format(self.filename))
        self._checksum = None

    @classmethod
    def supports(cls, filename):
        """returns True/False on whether we support the given compressed filename"""
        exts = tuple(cls.extensions.keys())
        return filename.endswith(exts)

    @property
    def filename(self):
        """file that we are checking for compression"""
        return self._filename

    @property
    def filepath(self):
        return os.path.join(self._startdir, self._filename)

    def decompress(self, **kwargs):
        """
        Args:
            silent : (optional) whether to display message about uncompression
            always : (optional) override to just always decompress, and not
                     check criteria

        Returns:
            list of flies decompressed

        """
        silent = kwargs.pop("silent", False)
        always = kwargs.pop("always", False)

        assert len(kwargs) == 0, "Unexpected kwargs for decompress"
        for ext, funcname in self.extensions.items():
            if self.filename.endswith(ext):
                # if checksum fails
                files = self.file_list
                filelock_path = os.path.join(self._startdir, self.filename) + ".lock"
                # make sure path is writable
                writeable = os.access(os.path.dirname(filelock_path), os.W_OK)
                if not writeable and settings.NO_FILELOCKS is False:
                    if always:
                        raise ValueError("always specified, but location to extract to is read-only")
                    elif not self.check_files_exist():
                        raise RuntimeError("cannot uncompress because location is read-only -- and files are missing:\n\t%s"%self.filename)
                    elif not self.check_checksum():
                        raise RuntimeError("cannot uncompress because location is read-only -- and but checksum says files are out of date.\n\t%s"%self.filename)
                    else:
                        # must be ok, tob e done
                        pass
                else:
                    # location is writeable
                    if settings.NO_FILELOCKS is False:
                        lock = filelock.FileLock(filelock_path)
                    else:
                        lock = _DummyLock(filelock_path)
                    # ok, we are supposed to uncompress, but we should re do the checksum compare
                    # with a lock present to make sure only one proc ends up doing an uncompress
                    try:
                        with lock.acquire(settings.UNZIP_LOCK_TIMEOUT):
                            if (always or
                                    not self.check_checksum() or
                                    not self.check_files_exist()):
                                # now it is time to decompress
                                time_before = datetime.now()
                                msg = "Uncompressing {0:<20}...".format(self.filename)
                                _LOG.info(msg)
                                if not silent:
                                    # make sure this goes out to user...
                                    sys.stdout.write(msg)
                                    sys.stdout.flush()
                                # get correct decompress function and call it
                                func = getattr(self, funcname)
                                try:
                                    files = func()
                                except IOError as e:
                                    raise
                                    err = IOError((str(e) +
                                            "\n\tThis occurred during decompression, make sure you do "
                                            "not have another python process open, or the file is not "
                                            "already open by this process"))
                                    raise err
                                self.write_checksum()
                                if not silent:
                                    time_after = datetime.now()
                                    time_delta = time_after - time_before
                                    seconds = time_delta.total_seconds()
                                    minutes = int(seconds / 60)
                                    remaining_seconds = round(seconds % 60, 2)
                                    sys.stdout.write(".. done in {0:<2} minute(s) and {1:<5} second(s)\n".format(minutes, remaining_seconds))
                            try:
                                # hopefully this is safe
                                os.remove(filelock_path)
                            except:
                                pass
                    except filelock.Timeout:
                        msg = ("Unzip hit timeout acquiring lock. You may need to delete this file "
                               "and re-run:\n\t %s"%filelock_path)
                        _LOG.error("\n"+msg)
                        raise RuntimeError(msg)
                # break after we done decompress check
                break
        else:  # no matching extension found
            raise Exception("{0} does not end with known compression extension"
                            .format(self.filename))
        return files

    @property
    def checksum(self):
        """return the md5 checksum for the file"""
        if self._checksum is None:
            with open(self.filepath, "rb") as file_obj:
                self._checksum = hashlib.md5(file_obj.read()).hexdigest()
        return self._checksum

    def check_checksum(self):
        """Checks for checksum file and reports whether checksum file matches checksum of existing file

        Returns:
            True/False : False = unzip needed

        """
        if not os.path.exists(self.checksum_file):
            _LOG.debug("UnzipCheck: checksum file missing for: {0}".format(self.filename))
            return False
        try:
            with open(self.checksum_file) as file_obj:
                last_checksum = file_obj.read()
        except:
            import traceback
            traceback.format_exc()
            _LOG.info("Failed to open checksumfile for {0}".format(self.filename))
            _LOG.debug("Checksum file open traceback\n" + traceback.format_exc())
        checksum_passed = (self.checksum == last_checksum)
        if not checksum_passed:
            _LOG.debug("UnzipCheck: checksum failed for: {0}, decompress needed"
                       .format(self.filename))
        return checksum_passed

    @property
    def file_list(self):
        """Get list of files that should have been extracted from the file for this object

        Note: this only works well if the tar is for a single file OR the format is zip, other formats
        require a complete decompression and we don't do that
        """
        if self.expected_files:
            files = [os.path.join(self._startdir, f) for f in self.expected_files]
        elif self.filename.endswith(".zip"):
            with zipfile.ZipFile(self.filepath, "r") as myz:
                files = myz.namelist()
            # make sure it is relative to the directory that was given as
            # the filepath
            files = [os.path.join(self._startdir, f) for f in files]
        else:
            files = []
        return files

    def check_files_exist(self):
        """see if the specified files that should be in this compressed file exist"""
        # make sure files exist
        for f in self.file_list:
            if not os.path.exists(f):
                return False
        return True

    def _unzip(self):
        """Assuming filename is a zip, unzip it"""
        with zipfile.ZipFile(self.filepath, "r") as myz:
            files = myz.namelist()
            myz.extractall(self._startdir)
        files = [os.path.join(self._startdir, f) for f in files]
        return files

    def _unbz2(self):
        """Assuming filename is a tar.bz2, decompress it"""
        tar = tarfile.open(self.filepath, "r:bz2")
        files = []
        for tinfo in tar.getmembers():
            tar.extract(tinfo, self._startdir)
            files.append(tinfo.name)
        tar.close()
        files = [os.path.join(self._startdir, f) for f in files]
        return files

    def _untar(self, filepath=None):
        """take a tarfile and extract it"""
        filepath = filepath or self.filepath
        tar = tarfile.open(filepath, "r")
        files = []
        for tinfo in tar.getmembers():
            tar.extract(tinfo, self._startdir)
            files.append(tinfo.name)
        tar.close()
        files = [os.path.join(self._startdir, f) for f in files]
        return files

    def _unlzma(self):
        """
        assuming file is lzma decompress it. if it is tar, then
        also extract those files
        """
        if PY2:
            from backports import lzma
        else:
            import lzma
        outfilename = self.filepath[:-3]
        with open(outfilename, 'wb') as outf:
            with lzma.open(self.filepath, "rb") as inf:
                while True:
                    data = inf.read(100*1024*1024)
                    if not data:
                        break
                    outf.write(data)
        if outfilename.endswith("tar"):
            filenames = self._untar(outfilename)
            # remove the temporary tar file
            os.remove(outfilename)
            return filenames
        return [outfilename]

    def write_checksum(self):
        with open(self.checksum_file, "w") as file_handler:
            file_handler.write(self.checksum)

    def clean_decompressed(self):
        """if files exist and checksum passes, remove the expected files"""
        if self.check_checksum():
            for fname in self.file_list:
                # file vs. directory?
                if os.path.isdir(fname):
                    shutil.rmtree(fname)
                else:
                    os.remove(fname)
        os.remove(self.checksum_file)


def clean_decompressed(directory, olderthan=None, recursive=True):
    """
    deletes all files that have a corresponding compressed file
    
    Args:
        directory : directory to look for compressed files in
        olderthan : datetime object so we only delete if older than the specified time
        recusive : searches for compressed files between specified directory
    
    **Note**: currently this really only works for single file/directory compressions like .tar.xz
    """   
    for root, dirs, files in os.walk(directory):
        for filename in files:
            filepath = os.path.join(root, filename)
            if not UnzipCheck.supports(filepath):
                continue
            # looks like this one exists
            unzip = UnzipCheck(filepath, single=True)
            if not unzip.check_files_exist():
                continue
            # we cant spot local modifications...this would only tell us if the zip/tar.xz is newer
            # not whether the uncompressed files have been modified...
            #checksums_match = unzip.check_checksum()
            # if checksum fails, then file has local mods
            #if not checksums_match:
            #    _LOG.info(f"Skipped removing")
            # should be safe to delete
            for fname in unzip.file_list:   
                if olderthan is not None:
                    file_timestamp = pathlib.Path(fname).stat().st_mtime
                    file_timestamp = datetime.fromtimestamp(file_timestamp)
                    # if file is newer than the datetime provided, then dont delete it
                    if file_timestamp > olderthan:
                        continue
                # first attempt removing just the data.mdb, we dont want rm tree to delete other files if the main db is open
                if fname.endswith(".lmdb"):
                    data_mdb = os.path.join(root, fname, "data.mdb")
                    try:
                        os.remove(data_mdb)
                        unzip.clean_decompressed()
                        _LOG.result(f"Cleaned-up {filepath}")
                    except PermissionError:
                        _LOG.result(f"Skipping {filepath}, it must be open")

        if not recursive:
            break

