
# INTEL CONFIDENTIAL
# Copyright 2014 2018 Intel Corporation
#
# The source code  contained or  described herein and  all documents related to
# the source code  ("Material") are owned by Intel Corporation or its suppliers
# or licensors.  Title to the  Material  remains with  Intel Corporation or its
# suppliers  and licensors. The Material contains trade secrets and proprietary
# and  confidential  information  of  Intel  or  its  suppliers  and  licensors.
# The Material  is protected  by worldwide  copyright and trade secret laws and
# treaty provisions.  No part of the Material  may  be used, copied, reproduced,
# modified, published, uploaded, posted, transmitted, distributed, or disclosed
# in any way without Intel's prior express written permission. No license under
# any  patent,  copyright, trade secret or other intellectual property right is
# granted  to  or conferred upon you by disclosure or delivery of the Materials,
# either expressly, by implication, inducement, estoppel or otherwise.
# Any license under such intellectual property rights must be express and
# approved by Intel in writing.




# LMDB database
#    databases are:
#            components, nodes, general
#
#    component database:
#        - key = component path
#        - value =
#            data = dict(
#                name           = self.definition.name,
#                sub_components = self.definition.sub_components.keys(),
#                nodenames      = self.definition.nodenames,
#                info           = self.definition.info,
#                value_class    = vclass, # module:classname
#            )
#
#    node database
#        - key = component path + node name
#        - value = pickled nodes
#
#        idea:
#        - value =
#            nodetype = Node,Register,Field,Array
#            nodenames = child nodes
#            info = general information
#            accesses = {}
#            access_group = {}
#
#    general
#        - fileformat_version
#        - namednodes_version_required
#        - namednodes_version_builtwith
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from namednodes.utils.ordereddict import odict
import namednodes

# import bson # not used...but a possible future option instead of pickle
import weakref
from ..utils import unzip
from ..utils._py2to3 import *
if PY2:
    import cPickle as pickle
    from cStringIO import StringIO
else:
    import pickle as pickle
from io import BytesIO
# only for really old pythons do we do this hack
if sys.version_info[0:2] == (2,7) and sys.version_info[2] <= 3:
    from cStringIO import StringIO as BytesIO
from cached_property import threaded_cached_property
import os
# attempt LMDB import but set to None if not available
try:
    if sys.platform == 'win32':
        import lmdb_m as lmdb
    else:
        import lmdb
except ImportError:
    lmdb = None

try:
    import lmdb as lmdb_original
except ImportError:
    lmdb_original = None

import time
import importlib
import tarfile
import types
import sys
from distutils.version import StrictVersion
from ..logging import getLogger
from six import PY2, PY3
if PY2:
    import imp
    from collections import MutableMapping
if PY3:
    import importlib as imp
    from importlib.machinery import SourceFileLoader
    from collections.abc import MutableMapping


from .. import settings
from ..comp import (
    ClearComponentCache,
    ComponentPlugin,
    ComponentDefinitionPlugin,
    ComponentLoaderPlugin,
    NamedComponent,
    NamedComponentDefinition
    )

from ..nodes import (
    NamedNodeArrayDefinition,
    NamedNodeArrayItemDefinition,
    NamedNodeDefinition,
    NodeDefinitionPlugin,
    NodeTypes
    )

from ..registers import (
    RegisterDefinition,
    RegisterComponent,
    FieldDefinition
    )

from svtools.intel_version import IntelVersion
from ..errors import VersionError

### Changes from 1.1 to 1.2
### - nodenames now is kept as a seperate entry in the database, instead of
###   part of the component dictionary that always gets loaded
###
### Changes from 1.2 to 1.3
### - pickles are always written and read using bytes
###
_LMDB_FILE_VERSION = StrictVersion("1.2")

_LOG = getLogger()

def onerror(func, path, exc_info):
    """
    Error handler for ``shutil.rmtree``.

    If the error is due to an access error (read only file)
    it attempts to add write permission and then retries.

    If the error is for another reason it re-raises the error.

    Usage : ``shutil.rmtree(path, onerror=onerror)``
    """
    import stat
    if not os.access(path, os.W_OK):
        # Is the error an access error ?
        os.chmod(path, stat.S_IWUSR)
        func(path)
    else:
        raise Exception("could not change permissions for removing tree")

import gc
###### Pickle, no compression
def marshal_out_pickle(obj):
    return pickle.dumps(obj, 2) # 2 is highest supported in python2.7

def marshal_in_pickle(obj):
    if obj is None: return None
    obj = BytesIO(obj)
    if PY2:
        return pickle.load(obj)
    else:
        return pickle.load(obj, encoding="utf-8")

###### ZLIB + Pickle
###### WARNING!!! zlib maxes out at 2GB
import zlib
def marshal_out_pickle_zlib(obj,compress_amount=3):
    return zlib.compress( pickle.dumps(obj, -1), compress_amount)

def marshal_in_pickle_zlib(obj):
    if obj is None: return None
    return pickle.loads( zlib.decompress( obj ) )

###### BZ2 + Pickle
def marshal_out_pickle_bz2(obj, compress):
    import bz2
    if compress:
        return bz2.compress( pickle.dumps(obj, -1), 3)
    else:
        return pickle.dumps(obj, -1)

def marshal_in_pickle_bz2(obj):
    import bz2
    return pickle.loads( bz2.decompress( obj ) )

def marshal_out_ujson(obj):
    import ujson
    return ujson.dumps(obj, ensure_ascii=False)

def marshal_in_ujson(obj):
    import ujson
    if PY2:
        obj = StringIO(obj)
    else:
        obj = BytesIO(obj)
    return ujson.load(obj)



class DBManager(object):
    #: path to the DB file that this manager points to
    dbpath = None
    #: for pickle
    protocol = -1
    #: for writing to database
    env = None
    #: for if we are open in read-only and flushing data is not an option
    _read_only = False
    # we need to keep track of open managers
    # for when we unpickle
    managers = {}
    known_databases = [b'components', b'nodes', b'general']
    max_dbs = len(known_databases)+1
    closed = False

    @classmethod
    def close_all(cls):
        """used to close all open lmdb files, specifically this is used in testing"""
        for db in cls.managers.values():
            db.close()
        cls.managers.clear()
        # must clear out the high level component cache when we close all the databases
        ClearComponentCache()

    @classmethod
    def get(cls, dbpath, **kwargs):
        """Return's DB Manager instance. All aruguments are optional except the dbpath

        Args:
            dbpath : path to databasef ile
            access : open for read or read-write
            maxsize : default database size
            incsize : amount to increase size to if a write fails
            marshal_type : type of marshalling to use in/out (only
                'pickle_zlib' currently supported")

        if database was open and parameters are the roughly the
        same (size parameters are not checked after first open),
        then return existing DBManger.

        if parameters are changed, then close the DB and reopen it
        """
        # make sure we normalize path so that we are doing are lookup correctly
        dbpath = os.path.normpath(dbpath)
        if dbpath in cls.managers:
            prev = cls.managers[dbpath]
            # if closed or someone needs to get 'w' specifically, then
            # we should close and return new one
            if prev.closed or kwargs.get('access','rw')=='w':
                return cls(dbpath,**kwargs)
            else: # still open, so return what we have
                return cls.managers[dbpath]
        else:
            return cls(dbpath,**kwargs)

    def __init__(self, dbpath="dict.lmdb",
                 access='rw',
                 maxsize=2*1024*1024*1024,
                 incsize=1*1024*1024*1024,
                 marshal_type="pickle"):
        """
        Args:
            dbpath : path to databasef ile
            access : open for read or read-write
            maxsize : default database size
            incsize : amount to increase size to if a write fails
            marshal_type : type of marshalling to use in/out (only 'pickle_zlib' currently supported")
        """
        self.dbpath = dbpath
        self._incsize = incsize
        self._compdb = None
        self._nodedb = None
        # if it was already there...close it
        if self.dbpath in self.managers:
            self.managers[self.dbpath].env.close()

        # open database, this is first open, not a database size increase
        self._mapsize = self.open(access, maxsize)

        # make sure our known databases exist
        if access != 'r':
            for dbname in self.known_databases:
                self.env.open_db(dbname)

        # try and retrieve general information from the database
        # this is mostly for debug
        self.general_db = DbInfo(self, b"general")
        self.components_db = DbInfo(self, b"components")
        self.nodes_db = DbInfo(self, b"nodes")

        # now add this current one
        self.managers[self.dbpath] = self

        assert marshal_type in ['pickle_zlib','pickle','bz2'],"%s not supported for marshal type"%marshal_type
        self.marshal_type = marshal_type
        if marshal_type=="pickle_zlib":
            self.marshal_in = marshal_in_pickle_zlib
            self.marshal_out = marshal_out_pickle_zlib
        elif marshal_type=="pickle":
            # this was pre-python3 support and has issues due to writing ascii in 2 and reading bytes in 3
            self.marshal_in = marshal_in_pickle
            self.marshal_out = marshal_out_pickle
        elif marshal_type=="bz2":
            raise Exception("Not supported")
            #self.marshal_in = marshal_in_bz2
            #self.marshal_out = marshal_out_bz2

        #self.queue = queue.Queue()
        #self.threads = []
        #for t in range(1):
        #    thread = threading.Thread(target=self._thread_write)
        #    thread.daemon=True
        #    thread.start()
        #    self.threads.append(thread)
        #
        #
        # args = dict(
        #     path=self.dbpath,
        #     max_dbs = self.max_dbs,
        #     map_size = 2*1024*1024*1024,
        #     writemap = False,
        #     map_async = False,
        #     sync = False,
        #     lock = True,
        #     meminit = False,
        #     metasync = False,
        # )
        #
        # import multiprocessing.connection
        # oldsize = multiprocessing.connection.BUFSIZE
        # multiprocessing.connection.BUFSIZE=100*1024*1024
        # self.queue = multiprocessing.Queue()
        # self.procs = []
        # for p in range(5):
        #     p = WriteProc(args, self.queue)
        #     p.daemon = True
        #     p.start()
        #     self.procs.append(p)
        # multiprocessing.connection.BUFSIZE=oldsize

    def _thread_write(self):
        while True:
            database, datadict = self.queue.get()
            marshal = database != b"general"
            db = self.env.open_db(database)
            with self.env.begin(write=True, db=db) as txn:
                # hopefully this sets txn to point to our db
                for key, v in datadict.iteritems():
                    if marshal:
                        txn.put( key, self.marshal_out( v ), dupdata=False, overwrite=True)
                    else:
                        txn.put( key, v, dupdata=False, overwrite=True)

    def __del__(self):
        # when we are getting deleted, make sure we close environment first
        self.close()

    def close(self):
        """Close environment and remove ourselves from the known databases"""
        if not self.closed and self.env is not None:
            self.env.close()
        # always set this...
        self.closed = True
        #del self.managers[self.dbpath]

    def open(self, access, mapsize):
        """
        Args:
            access  : how to opent the databse: 'r','w','rw'
            mapsize : size of file to open

        Returns:
            mapsize : resulting size of the database

        If access is "w", then we will delete an existing database file if it exists
        """
        # also relies on "dbpath" and "maxdbs" for the open call

        if self.env is not None:
            self.env.close()

        # IF DB exists we need to:
        # - clear out the old 'mdb' if the access='w'
        # - get the existing size of the file for the next open (vs. using mapsize)
        # default "oldsize" to mapsize
        oldsize = mapsize
        if os.path.exists(self.dbpath+"/data.mdb"):
            # if we are supposed to override the database, then remove it
            if access=='w':
                # remove any files in the db directory...we don't delete the WHOLE dir
                # in case there is some other file that has been added...like an acccess py file :)
                for f in os.listdir( self.dbpath ):
                    if f.endswith(".mdb"):
                        os.remove( os.path.join(self.dbpath,f) )
            # if file exists and we are supposed to keep it, then use it for file size
            else:
                oldsize = os.stat(self.dbpath+"/data.mdb").st_size
                mapsize = max(mapsize, oldsize)
                # see if we should attempt to
                writeable = os.access(self.dbpath+"/data.mdb", os.W_OK)
                # turn our access in to readonly...
                if writeable is False:
                    access = 'r'
        if settings.LMDB_FORCE_RO:
            access = 'r'
        # do actual open
        if access == 'r': # read only:
            # number of dbs ? ... need to query the main to determine it
            # read-only has to use lmdb_legacy
            # don't import till here due to some issues being worked out
            # to lmdb not building right in some of our linux environments
            self.env = lmdb_original.open(self.dbpath,
                                 max_dbs=self.max_dbs,
                                 map_size=mapsize,
                                 readonly=True,
                                 lock=False)
            self._read_only = True
        else: # either write or read/write
            kwargs = dict(
                path=self.dbpath,
                max_dbs=self.max_dbs,
                map_size=mapsize,
                writemap=False,
                map_async=False,
                sync=False,
                lock=True,
                meminit=False,
                metasync=False,
            )
            # be warry of write optimizations because we might crash DB often trying to find
            # the best mapsize that we can
            try:
                self.env = lmdb.open(**kwargs)
                self._read_only = False
            except (lmdb.ReadonlyError, lmdb.ReadersFullError, lmdb.Error) as e:
                # always use the legacy/default one if we are opening in read only mode
                # this is due to the win32 sparse file version not support ing it
                import lmdb as lmdb_legacy
                # 99% of the time, if we we get the readers full, that means that
                # it is in a shared enviornment that should have been RO anyway
                kwargs['readonly']=True
                kwargs['lock'] = False
                kwargs['map_size'] = oldsize
                self.env = lmdb_legacy.open(**kwargs)
                self._read_only = True

        self.closed = False
        return mapsize

    def read(self, database, key, default=None):
        """return the value stored in 'database' for the specified key
        Args:

        """
        marshal = database!=b"general"
        with self.env.begin(buffers=True) as txn:
            db = self.env.open_db(database, txn)
            if not PY2 and isinstance(key, str):
                key = key.encode()
            # not sure why this is needed on writes, but on reads
            # can cause python2 to fail...so don't add this line
            # while we support python2, even though it looks like
            # it should be needed....
            # key = bytes(key)
            value = txn.get(key, default=default, db=db)
            if value is default:
                return value
            if not PY2 and type(value) is memoryview:
                value = value.tobytes()
            elif PY2 and type(value) is buffer and not marshal:
                value = str(value)
            if marshal:
                return self.marshal_in(value)
            else:
                return value

    def write(self, database, key, data):
        """
        performs a write that is guarenteed to succeed,
        even if it requries a database close/reopen
        """
        if database != b"general":
            dataout = self.marshal_out( data )
        else:
            dataout = data
        try:
            with self.env.begin(write=True) as txn:
                db = self.env.open_db(database, txn)
                if not PY2 and isinstance(key, str):
                    key = key.encode()
                key = bytes(key)
                txn.put( key, dataout, dupdata=False, overwrite=True, db=db)
        except lmdb.MapFullError:
            # commit what we have so far...
            self._dummy_txn()
            self._mapsize += self._incsize
            self.env.set_mapsize(self._mapsize)
            # call again
            self.write(database, key, data)

    def _dummy_txn(self,database=b"general"):
        """
        performas a dummy transaction to help with cases where we are
        overwriting previous keys that are large
        """
        self.write(database,b"__dummy__",b"")
        self.write(database,b"__dummy__",b"")

    def write_multiple(self, database, datadict):
        """write the given dictionary to the specified database
        Args:
            database (str) : name of database to write to
            datadict (dict) : key=database key, value=data to write to database
        """
        # thread
        #self.queue.put((database,datadict))
        #return
        marshal = database != b"general"
        try:
            gc.disable()
            with self.env.begin(write=True, buffers=True) as txn:
                db = self.env.open_db(database, txn)
                # hopefully this sets txn to point to our db
                for key, v in datadict.items():
                    # need in python3 always, and in python2 if the database was created
                    # with python2
                    if not PY2 and isinstance(key, str):
                        key = key.encode()
                    key = bytes(key)
                    if marshal:
                        txn.put( key, self.marshal_out( v ), dupdata=False, overwrite=True, db=db)
                    else:
                        txn.put( key, v, dupdata=False, overwrite=True, db=db)
            #self.env.sync()
        except lmdb.MapFullError:
             # commit what we have so far...
             self._dummy_txn()
             self._mapsize += self._incsize
             self.env.set_mapsize(self._mapsize)
             # call again
             self.write_multiple(database, datadict)
        finally:
             gc.enable()


    def delete(self, database, key):
        """Delete a key from the database
        Args:
            database (str) : database to delete the key from
            key (str) : key to remove from the database
        """
        db = self.env.open_db(database)
        with self.env.begin(db=db, write=True) as txn:
            txn.delete(key)

    @property
    def read_only(self):
        """whether we opened the database in read only mode"""
        return self._read_only

    def sync(self):
        """force a sync of the database"""
        self.env.sync()


class DbInfo(MutableMapping):
    """
    This provides a wrapper for reading/writing information from the db
    as if it were a dictionary. It is not meant forlarge amounts of
    storage, and is not meant to have high performance since it does a
    transaction at a time.

    It is primarily used for easing the storing of general information
    regarding the data file like:

        - fileformat_version
        - namednodes_version_required
        - namednodes_version_builtwith

    Note::
        This currently only support string values

    """
    #: database manager to use for transaction
    _dbmgr = None
    #: name of database to retrieve info from
    _database_name = None
    #: whether we should compress during marshalling
    compress = None
    #: whether we should marshal data, or send string directly
    marshal = None
    def __init__(self, dbmgr, database_name):
        """
        Args:
            dbmgr : manger for the database
            database_name : name of database within the larger database that we sould pull from
            marshal : whether to marshal data on in/out
        """
        self._dbmgr = dbmgr
        self._database_name = database_name

    def __getitem__(self, item):
        return self._dbmgr.read(self._database_name, item)

    def get(self, item, default=None):
        return self._dbmgr.read(self._database_name, item, default)

    def __setitem__(self, item, value):
        return self._dbmgr.write(self._database_name, item, value)

    def __delitem__(self, item):
        return self._dbmgr.delete(self._database_name, item)

    def __len__(self):
        count = 0
        for i in self:
            count += 1
        return count

    def __iter__(self):
        db = self._dbmgr.env.open_db(self._database_name)
        with self._dbmgr.env.begin(db=db) as txn:
            with txn.cursor() as cursor:
                for key, value in cursor:
                    yield key.decode()

    def keys(self):
        if PY2:
            return super(DbInfo, self).keys()
        else:
            return list(super(DbInfo, self).keys())


class LmdbComponent(NamedComponentDefinition):
    # database path to our component
    _key = None
    # database manager object for talking to db
    _dbmgr = None
    # dictionary of sub components, where value may be None until we pull from database
    _sub_components = None

    def __init__(self, key, dbmgr, parent):
        """
        Yes, different constructor than ComponentDefinition, used to build Definition
        that can talk to lmdb
        """
        self._key = key
        self._bKey = key.encode()
        self._dbmgr = dbmgr
        # hold reference to true parent to survive when we clear the cache
        self._parent = parent
        self._init_from_db()
        # used to know when we have loaded nodenames
        self._names_loaded = False
        # to track what nodes have been removed in case we get a "save" call
        self._removed_nodes = set()


    def _init_from_db(self):
        """used during __init__ and after a save to pull our info from the database"""
        try:
            dbdata = self._dbmgr.components_db[self._bKey]
        except:
            _LOG.error("Error getting component: {0}, in file {1}".format(self._key,self._dbmgr.dbpath))
            raise
        # get value class from database
        # now create component definition from paretn class
        # nodes should start off as empty here during constructor
        modname, classname = dbdata['value_class'].split(":")
        mod = importlib.import_module(modname)
        classobj = getattr(mod,classname)
        NamedComponentDefinition.__init__(self, dbdata['name'], dbdata['info'], {}, classobj)
        # fix parent and origin back
        self.parent = self._parent
        if self.parent is None:
            self.origin = weakref.proxy(self)
        else:
            self.origin = self.parent.origin
        # remove any nodes dict that got created, so that our cached property works
        del self._nodes

        # early versions of database always loaded nodenames
        self._nodenames_v1 = dbdata.get('nodenames', None)

        self._sub_components = odict()
        for sub in dbdata['sub_components']: # create now to keep order
            self._sub_components[sub] = None
        self._sub_components_all_loaded = False

        #### Accesses
        # if our definition has access module make sure it is loaded
        # if it has a path AND a module, that we need to create a new module
        # and load that access code in to it
        if self.info.get('access_modname') != None:
            modname = self.info.get('access_modname')
            # we should always re-do this in case of stepping changes and we need to reload
            import warnings
            warnings.filterwarnings("ignore", category=RuntimeWarning, module=modname)
            access_filename = self.info.get("access_filename", None)
            if access_filename is not None:
                # update accessfilename to be relative to the lmdb file
                access_filename = os.path.normpath(\
                                 os.path.join( self._dbmgr.dbpath, access_filename)\
                                 )
                if not os.path.exists(access_filename):
                    raise Exception("Missing the specified access filename: %s"%access_filename)
                if PY2:
                    sys.modules[modname] = imp.load_source( modname, access_filename)
                else:
                    loader = SourceFileLoader( modname, access_filename)
                    mod = types.ModuleType(loader.name)
                    sys.modules[modname] = mod
                    loader.exec_module(mod)

            else: # we hvae mod but not file, just load access
                sys.modules[modname] = importlib.import_module( modname )

    # I don't recall why we could not use threaded_cached_property here...but you cannot
    @property
    def _nodenames(self):
        nodenames = self._dbmgr.components_db[self._bKey+b".nodenames"]
        if nodenames is None: # check for older info
            if self._nodenames_v1 is not None:
                nodenames = self._nodenames_v1
            else:
                raise VersionError("database version not understood")
        return nodenames

    @threaded_cached_property
    def _nodes(self):
        """this is a cached property so that we can lazily load all our node information"""
        #### Get our nodenames from cache or database
        nodenames = self._nodenames
        # note we are using _nodenames, not the publict one
        nodes = odict(list(zip(nodenames, [None]*len(nodenames))))
        self._names_loaded = True
        return nodes

    def __contains__(self, item):
        # if we haven't loaded, use nodenames (which is slower than a dict/set
        #if not self._names_loaded:
        #    return item in self._nodenames
        #else:
        # slow since it checks DB, but this should be rarely used
        if self._names_loaded:
            return item in self._nodes
        else:
            return item in self._nodenames

    def _clear_cache(self,recursive=True):
        # this will wipe out any additions that were not saved
        # by deleting our attribute we will cause the cached property to get redone
        # on the next access to _nodes
        self.__dict__.pop('_nodes',None)
        if recursive:
            for subcomp in self.sub_components.values():
                if subcomp is not None and isinstance(subcomp, LmdbComponent):
                    subcomp._clear_cache()
        # re-initialize from db
        self._init_from_db()

    @property
    def _all_node_paths(self):
        """not for general use, only exists in lmdb"""
        # can consider adding this as public, but then it needs to be added
        # to higher level component
        paths = self._dbmgr.components_db[self._bKey+b".node_paths"]
        # old method had them in the compdict
        if paths is None:
            return self._dbmgr.components_db[self._bKey]['node_paths']
        else:
            return paths

    @property
    def sub_components(self):
        if self._sub_components_all_loaded is True:
            return self._sub_components
        # created to have same function as value class
        for subname, subobj in self._sub_components.items():
            if self._sub_components[subname] is None:
                newcomp = LmdbComponent( self._key + "." + subname, self._dbmgr, self )
                self._sub_components[subname] = newcomp
        self._sub_components_all_loaded = True
        return self._sub_components

    def get_node(self, nodename, default=KeyError):
        node = self._nodes.get(nodename, False)
        if node is False and default is KeyError:  # do not do == check
            raise KeyError("Unknown node: %s on component %s"%(nodename, self.path))
        elif node is False:  # default must be specified
            return default
        elif node is not None:
            # not False and not None means this is valid object
            return node
        else:
            # not sure why but must pass db in to cursor instead of transaction due to the DB being open
            # in the read-only mode
            node_dict = self._dbmgr.nodes_db[self._bKey+b"."+nodename.encode()]
            if node_dict is None:
                raise AttributeError("Unknown node: %s"%nodename)
            node = _node_from_dict(node_dict)
            node.component = weakref.proxy(self)
            if CacheContext.cache is not None: # specific action requested
                if CacheContext.cache and settings.LMDB_DEFINITION_CACHE == True:
                    self._nodes[nodename] = node
                    return node
                else:
                    return node
            elif self.info.get("lmdb_cache_nodes", True) and settings.LMDB_DEFINITION_CACHE:
                self._nodes[nodename] = node
                return node
            else:
                return node

    def remove_node(self, nodename):
        """removes node from data"""
        # needed to be different than main, so that we can track what was removed
        super(LmdbComponent, self).remove_node(nodename)
        self._removed_nodes.add(nodename)

def _node_to_dict( node, pathlist, array_info):
    """given the specified node, return a dictionary that can be copied in to the data

    Args:
        node : node object to convert
        pathlist : list to add all the paths found to.
        array_info : dictionary for names of arrays and item_keys we have seen

    Returns:
        node_dict : dictionary representing the node(s)
    """
    # only needed if we decide to write ALL nodes to comp
    #if len(node.path)>=512:
    #    raise Exception("node path too long...uh-oh...")

    node_info = odict(node.info) if not isinstance(node.info,odict) else node.info
    node_accesses = odict(node.accesses) if not isinstance(node.accesses, odict) else node.accesses

    nodedict = dict(
                        name=node.name,
                        type=node.type,  # register,field,array,etc...
                        # not 100% sure we need this for field (?)
                        access_group=node.access_group,
                        # saving takes a bit longer, but make
                        # sure we are saving everything as our own odict
                        # vs. whatever was passed in
                        info=node_info,
                        accesses=node_accesses,
                        #nodenames = node.nodenames, # children nodes to get later
                        nodes=[],
                        )

    if node.type == NodeTypes.Array:
        array_info[node.name] = node.item_keys
        nodedict['item_keys'] = node.item_keys
        # dbdict[nodepath]['item_definition'] = node.item_definition.name
        nodedict['item_definition'] = _node_to_dict( node.item_definition,
                                                    pathlist,
                                                    array_info )
        pathlist.append( node.nodepath )
    else:
        # add our nodepath as is
        nodepath = node.nodepath
        pathlist.append( node.nodepath )

    # now add children in to the db dictionary
    for node in node.nodes:
        #_node_to_dict( node, dbdict
        nodedict['nodes'].append(
            _node_to_dict( node, pathlist, array_info)
            )

    return nodedict

def _node_from_dict( nodedict ):

    nodetype = nodedict['type']
    if nodetype == NodeTypes.General:
        node = NamedNodeDefinition.__new__(NamedNodeDefinition)
    elif nodetype == NodeTypes.Register:
        node = RegisterDefinition.__new__(RegisterDefinition)
    elif nodetype == NodeTypes.Field:
        node = FieldDefinition.__new__(FieldDefinition)
    elif nodetype == NodeTypes.Array:
        node = NamedNodeArrayDefinition.__new__(NamedNodeArrayDefinition)
    elif nodetype == NodeTypes.ArrayItem:
        node = NamedNodeArrayDefinition.__new__(NamedNodeArrayItemDefinition)

    # to speed thing up by skipping __init__, so we have to add this ourselves
    node._nodes = odict()
    node.name = nodedict['name']
    node.info = nodedict['info']

    node.access_group = nodedict['access_group']
    node.accesses = nodedict['accesses']
    if nodetype is NodeTypes.Array:
        node._item_keys = nodedict['item_keys']
        node.item_definition = _node_from_dict( nodedict['item_definition'] )
        node.item_definition.parent = node

    for child in nodedict['nodes']:
        # hack for speed due to knowing about how nodes and odict work
        new_n = _node_from_dict(child)
        node._nodes._data[new_n.name]=new_n
        node._nodes._order.append(new_n.name)
        #weakref?
        new_n.parent = node
        #node.add_node( )

    return node


# this is currently NOT thread safe...
class CacheContext(object):
    """
    For controlling whether we cache definition objects after we pull from disk
    """
    default = None
    cache = None
    def __init__(self, cache_on):
        self._cache_choice = cache_on

    def __enter__(self):
        CacheContext.cache = self._cache_choice

    def __exit__(self,*args):
        # put back to default
        CacheContext.cache = CacheContext.default


import multiprocessing
class WriteProc(multiprocessing.Process):
    def __init__(self,open_args, queue):
        super(WriteProc,self).__init__()
        self.open_args = open_args
        self.queue = queue

    def run(self):
        self.env = lmdb.open(**self.open_args)
        while True:
            (database, datadict) = self.queue.get()
            try:
                gc.disable()
                marshal = database != b"general"
                with self.env.begin(write=True, buffers=True) as txn:
                    db = self.env.open_db(database, txn)
                    # hopefully this sets txn to point to our db
                    for key, v in datadict.iteritems():
                        if marshal:
                            key = bytes(key)
                            txn.put( key, marshal_out_pickle( v ), dupdata=False, overwrite=True, db=db)
                        else:
                            txn.put( key, v, dupdata=False, overwrite=True, db=db)
                #self.env.sync()
            except lmdb.MapFullError:
                 # commit what we have so far...
                 self._dummy_txn()
                 self._mapsize += self._incsize
                 self.env.set_mapsize(self._mapsize)
                 # call again
                 self.write_multiple(database, datadict)
            finally:
                 gc.enable()




class LmdbDefinitionPlugin(ComponentDefinitionPlugin):
    name = "tolmdb"
    mgr = None

    @classmethod
    def create(cls, definition):
        """Function for retuning a new (or existing) instance of the plugin
        Args:
            component (obj) : Named component instance that this pluging will
                              be added to

        Returns:
            - Plugin instance, if this plugin is supported for this component

            - **None** if lmdb is not installed

        The create function is encouraged to use the component.filepath to
        determine where to pull any pulgin specific data from.
        """
        if lmdb is None:
            # it is ok, since everything is RO, we'll use lmdb_original anyway...
            if settings.LMDB_FORCE_RO:
                return cls(definition)
            else:
                _LOG.warn("LMDB module missing")
                return None
        else:
            return cls(definition)

    def cache_control(self,cache_on):
        """
        For controlling whether we cache definition objects after we pull
        from disk
        """
        return CacheContext(cache_on)

    def clear_cache(self, recursive=True, silent=False):
        """clear out existing node definitions that we have cached"""
        if not isinstance(self.definition, LmdbComponent):
            if not silent:
                raise RuntimeError("this component was not created from an ldmb file")
            else:
                return
        # have to clear out children first...
        self.definition._clear_cache(recursive)
        return

    def write(self, filepath, **kwargs):
        """write this component to the specified lmdb
        Args:
            filepath : path to write to
            close_db : whether to close after writing (default=False)
            compress : whether to generate .tar.bz2 file
            recursive : (True)/False whether to write sub components
        """
        compress = kwargs.pop("compress", False)
        recursive = kwargs.pop("recursive", True)
        close_db = kwargs.pop("close_db", False)
        first_pass = kwargs.pop("first_pass", True)
        # build known db params
        db_params = {}
        for p in ['access','maxsize','incsize']:
            if p in kwargs:
                db_params[p] = kwargs.pop(p)
        if len(kwargs)>0:
            raise ValueError("Unexpected keyword arguments: %s"%list(kwargs.keys()))

        self.mgr = mgr = DBManager.get(filepath, **db_params)

        # make sure we write version we built the output file with
        # and the fileformat version
        # ONLY the origin should upate this information
        if first_pass:
            mgr.general_db[b'namednodes_version_builtwith'] = str(namednodes.__version__).encode()
            # new database will need at least this version 2.10.0 to be used for it
            # to operate properly (due to search related changes)
            mgr.general_db[b'namednodes_version_required'] = "2.9.99999".encode()
            mgr.general_db[b'fileformat_version'] = str(_LMDB_FILE_VERSION).encode()
            mgr.general_db[b'python_version_builtwith'] =  (".".join([str(s) for s in list(sys.version_info)])).encode()

        # we are writing a sub component that is not the origin, remove it temporarily
        if(first_pass and self.definition.parent is not None):
            definition_parent = self.definition.parent
            definition_parent.remove_component(self.definition)
        else:
            definition_parent = None

        try:
            node_data, nodelist = self._nodes2node_dict(self.definition.nodes)
            # now write it all out to our database.
            mgr.write_multiple(b"nodes", node_data)

            self._write_comp_data(mgr, nodelist)

            # write out subcomponents
            if recursive:
                for comp in self.definition.sub_components.values():
                    comp.tolmdb.write(filepath, first_pass=False)

        finally:
            # put back on parent if we removed it
            if definition_parent:
                definition_parent.add_component(self.definition)

        if compress or close_db:
            # we have to close to compress
            self.mgr.close()

        if compress:
            self._compress_lzma( filepath )

    def _write_comp_data(self, mgr, nodelist):
        """
        used by both write and save to save off current components data
        set nodelist = None to skip updates for nodenames and node_paths
        """
        # write out component info
        # must be after nodes are built in order to get nodelist correct
        vclass = self.definition.value_class
        vclass = vclass.__module__ + ":" + vclass.__name__
        comp_data = {}

        # would rather not have this...need to update c3po to not use
        # OrderedDict, but this saves a decent amount on our initial load time if
        # reduce any OrderedDict references
        self.definition.info = odict(self.definition.info)
        for k,v in self.definition.info.items():
            if isinstance(v,dict):
                self.definition.info[k] = odict(v)

        comp_data[self.definition.path] = dict(
                                name           = self.definition.name,
                                sub_components = list(self.definition.sub_components.keys()),
                                info           = self.definition.info,
                                value_class    = vclass,
                                 )

        if nodelist is not None:
            comp_data[self.definition.path+".nodenames"] = self.definition.nodenames
            comp_data[self.definition.path+".node_paths"] = nodelist
        mgr.write_multiple(b"components", comp_data )

        # update the origin if this is the top-most component
        if self.definition.parent is None:
            mgr.components_db[b'origin'] = self.definition.path


    def save(self, compress=False):
        """
        Saves any changes to the Component or its SubComponents to disk, without
        having to save the entire component. After calling this, any cached
        information will be cleared out. You should not use any components or
        nodes that were created before calling this save.

        This should only be called form the "origin"/top-most component

        This requires that the component came from an lmdb file when it was
        first created vs. CreateComponentDefinition

        Args:
            compress : used to specify whether to create a new compressed file
                      for the updated lmdb (default is False)

        """
        if self.definition.parent is not None:
            raise Exception("save can only be called from top level component")
        if not isinstance(self.definition, LmdbComponent):
            raise Exception("top level component has to have come from a lmdb file")

        # call the one that is recursive that assumes we are starting from the top level

        self._save(self.definition.origin._dbmgr)
        if compress:
            # we have to close file to properly compress (?)
            # we will be in trouble if someone does another access then...
            # self.definition.origin._dbmgr.close()
            self.compress()


    def _nodes2node_dict(self,nodes):
        """called from a few places to turn nodes in to a dictionary for writing"""
        node_list = []
        node_dict = {}
        for node in nodes:
            node_dict[node.path] = _node_to_dict(node, node_list, odict())
        # Remove duplicate nodes (due to arrays) but keep order
        seen = set()
        node_list_new = [x for x in node_list if not (x in seen or seen.add(x))]
        return node_dict, node_list_new

    def _save(self, dbmgr):
        """
        Args:
            dbmgr : the database to save the definition to

        """
        #########
        # save our own nodes and components
        #########
        node_data = {}
        # build big local dictionary for big fast update
        # since we are doing a save, only grab nodes that we have pulled off
        # the disk already
        # if our definitions in an LmdbComponent, assume we can access private
        # variables
        if isinstance(self.definition, LmdbComponent) and (
            dbmgr.read(b"components", self.definition.path.encode()) is not None):
            node_objs = list(self.definition._nodes.values())
            # if more than 1/3 of nodes were pulled from disk, assume it is faster to just
            # re-do things
            none_count = node_objs.count(None)
            len_node_objs = len(node_objs)
            nodes_removed = False # used to flag that nothing else may have changed, but nodes were removed
            # make sure it is > 0 first, then check count is less than 2/3
            if len_node_objs and none_count <= (2*len_node_objs/3):
                node_data, nodelist = self._nodes2node_dict(self.definition.nodes)
            else:
                # must not be "too many", lets go through and the nodelist
                nodelist = self.definition._all_node_paths
                for node in node_objs:
                    if node is not None:
                        # code is ugly, but this iterator is faster than appends
                        # here we are making sure we start with a node list that
                        # does not have the node that we are about to add back in
                        nodelist = [ nn
                                    for nn in nodelist
                                        if nn != node.name and not nn.startswith(node.name+".")]
                        node_data[node.path] = _node_to_dict(node, nodelist, odict())
                # now make sure we strip out removed nodes
                if len(self.definition._removed_nodes)>0:
                    nodes_removed = True
                    nodelist = [nn
                        for nn in nodelist
                        if nn.split(".")[0] not in self.definition._removed_nodes
                    ]

        else:
            # if not lmdb, use the typical API of nodes to get the node objects
            node_data, nodelist = self._nodes2node_dict(self.definition.nodes)

        # only write to db if we changed our nodes...
        update_comp = False
        if node_data != {}:
            # now write it all out to our database.
            dbmgr.write_multiple(b"nodes", node_data)
        elif len(self.definition.nodenames)==0:
            # we dont have nodes...this is not a special case of not
            # needing an update, so leave nodelist alone so that we write
            # the empty list
            pass
        elif not nodes_removed:
            # we didn't update any nodes, and none remove
            # but we definitely have children nodes
            # set nodelist=None to skip updating that part of the database
            nodelist = None

        _LOG.debug("updating comp: %s" % self.definition.path)
        self._write_comp_data(dbmgr, nodelist)

        ############################
        # save our sub components
        ############################
        for sub_name, sub_comp in self.definition._sub_components.items():
            if sub_comp is not None:
                # dont assume sub-comp is an lmdb component
                sub_comp.tolmdb._save( dbmgr )
            else:
                _LOG.debug("skipping comp: %s.%s" % (self.definition.path, sub_name))


        ############################
        # clear our cache
        ############################
        if isinstance(self.definition, LmdbComponent):
            self.definition.tolmdb.clear_cache()

    def compress(self, compression='xz'):
        """creates a new compressed lmdb for the specified component
        Args:
            compression (str): 'xz' or 'bz2' for type of compressed file to create
                                Default is subject to change
        """
        if self.definition.parent is not None:
            raise Exception("save can only be called from top level component")
        if not isinstance(self.definition, LmdbComponent):
            raise Exception("top level component has to have come from a lmdb file")
        dbpath = self.definition.origin._dbmgr.dbpath
        if compression == 'xz':
            self._compress_lzma(dbpath)
        elif compression == 'bz2':
            self._compress_bz2(dbpath)
        else:
            raise ValueError("compression type %s not supported"%compression)

    def _compress_bz2(self, filepath):
        dirname = os.path.split(filepath)[1]
        bz2_path = filepath+".tar.bz2"
        with tarfile.open(bz2_path, "w:bz2", format=tarfile.PAX_FORMAT) as tf:
            for root, dirs, files in os.walk(filepath):
                start_pos = root.find(dirname)
                start_path = root[start_pos:]
                for fname in files:
                    # skip the lock file
                    if fname.lower() == "lock.mdb":
                        continue
                    tf.add(os.path.join(root, fname),
                           os.path.join(start_path, fname))
        # go ahead and create checksum file also so that we don't immediately
        # try and load from the bz2 file
        unzip.UnzipCheck(bz2_path).write_checksum()

    def _compress_lzma(self, filepath):
        # import here since we are still working on finalizing this support
        if PY2:
            from backports import lzma
        else:
            import lzma
        dirname = os.path.split(filepath)[1]
        tar_path = filepath+".tar"
        lzma_path = tar_path + ".xz"
        with tarfile.open(tar_path, "w") as tf:
            for root, dirs, files in os.walk(filepath):
                start_pos = root.find(dirname)
                start_path = root[start_pos:]
                for fname in files:
                    # skip the lock file
                    if fname.lower() == "lock.mdb":
                        continue
                    tf.add(os.path.join(root, fname),
                           os.path.join(start_path, fname))
        # create lzma file and remove the temporary tar path
        with lzma.open(lzma_path, "wb") as outf:
            with open(tar_path, "rb") as inf:
                while True:
                    # some max number at a time to save on memory
                    data = inf.read(100*1024*1024)
                    if not data:
                        break
                    outf.write(data)
        # remove temporary tar file
        os.remove(tar_path)
        # go ahead and create checksum file also so that we don't immediately
        # try and load from the bz2 file
        unzip.UnzipCheck(lzma_path).write_checksum()

class LmdbLoader(ComponentLoaderPlugin):
    file_ext = "lmdb"


    @classmethod
    def create(cls, filepath):
        """
        May do additional checking the filepath and confirms that it supports
        the file by returning a instnace of the loader
        """
        if lmdb is None:
            # it is ok since everything is RO, we'll use lmdb_original anyway...
            if settings.LMDB_FORCE_RO:
                return cls(filepath)
            else:
                _LOG.warn("LMDB module missing")
                return None
        else:
            return cls(filepath)


    def parse(self, **kwargs):
        """
        Parse the specified database and return a component definition

        Args:
            access : (optional) specifies whether to open as 'r' or 'rw', default
                     is 'r'
        """
        # Make sure we are on 64bit python
        if not (sys.maxsize > 2**32):
            raise RuntimeError("Must use 64bit python for namednodes if using lmdb")

        # default would be read only mode for access
        access = kwargs.pop("access", "rw")
        if not os.path.exists(self.filepath):
            raise ValueError("File %s not found" % self.filepath)
        dbmgr = DBManager.get(self.filepath, access=access)
        if len(kwargs)>0:
            raise ValueError("Unknown arguments: %s"%kwargs)

        python_version_str = dbmgr.general_db.get(b"python_version_builtwith", b"0.0.0")
        if not PY2 and type(python_version_str) is bytes:
            python_version_str = python_version_str.decode()
        # note python version string is not always a valid "strict version", and not used quite yet, so
        # no need to convert to a version type yet...

        file_version_str = dbmgr.general_db.get(b"fileformat_version", b"0.0.0")
        if not PY2 and type(file_version_str) is bytes:
            file_version_str = file_version_str.decode()
        # we didn't store a proper version string for version 1
        file_version = StrictVersion("1.0") if file_version_str == "1" else StrictVersion(file_version_str)

        version_required_str = dbmgr.general_db.get(b"namednodes_version_required", b"0.0.0")
        if not PY2 and type(version_required_str) is bytes:
            version_required_str = version_required_str.decode()
        version_required  = StrictVersion(version_required_str)
        version_builtwith_str = dbmgr.general_db.get(b"namednodes_version_builtwith", b"0.0.0")
        if not PY2 and type(version_builtwith_str) is bytes:
            version_builtwith_str = version_builtwith_str.decode()
        version_builtwith = IntelVersion(version_builtwith_str)

        if version_required > IntelVersion(str(namednodes.__version__)):
            raise VersionError("Generated with a newer namednodes (%s)\n"
                               "   upgrade namednodes to open this file"%version_required)

        # the last compatibility break was:
        minimum_supported = IntelVersion("0.13.0a1")
        if version_builtwith < minimum_supported:
            raise VersionError("\n\t\tGenerated with to old of a version of namednodes (%s)\n"
                               "\t\t\tminimimum version is: %s please regenerate file"%
                               (version_builtwith, minimum_supported))

        # version one had compression by default...
        if file_version == IntelVersion("1.0"):
            dbmgr.close()
            dbmgr = DBManager.get(self.filepath, access=access, marshal_type="pickle_zlib")

        origin = dbmgr.components_db[b'origin']
        newcomp = LmdbComponent(origin, dbmgr, None )
        newcomp.info['namednodes_version_builtwith'] = version_builtwith_str
        newcomp.info['namednodes_lmdb_fileformat'] = version_builtwith_str
        newcomp.info['python_version_builtwith'] = python_version_str
        return newcomp

# checks for <something>.lmdb/data.mdb to see if it is an lmdb path that was passed in
class LmdbLoaderMdb(LmdbLoader):
    file_ext = "mdb"

    @classmethod
    def create(cls,filepath):
        """
        May do additional checking the filepath and confirms that it supports
        the file by returning a instnace of the loader
        """
        if filepath.endswith(".mdb") and os.path.dirname(filepath).endswith(".lmdb"):
            return cls(os.path.dirname(filepath))
        else:
            return None
