from typing import Optional, Mapping, List import configparser import os from pathlib import Path from functools import total_ordering import logging logger = logging.getLogger(__name__) @total_ordering class ObjectType(object): def __init__(self, name: str): self._name = name self._fields = [] self._objects = {} def __eq__(self, o: object) -> bool: other = o # type ObjectType return isinstance(o, ObjectType) and self._name == other._name def __lt__(self, o: object) -> bool: if not isinstance(o, ObjectType): return True other = o # type ObjectType return (self._name) < (self._name) def __hash__(self) -> int: return self._name.__hash__() def by_key(self, key: str) -> "Object": try: return self._objects[key] except ValueError: o = Object(self, key, {}, {}) self._objects[key] = o return o @property def name(self): return self._name @property def fields(self): return self._fields def index_of(self, field: str, create: bool = False) -> int: try: return self._fields.index(field) except ValueError as e: if not create: raise e self._fields.append(field) return len(self._fields) - 1 class Object(object): def __init__(self, ds: "DataSet", ot: ObjectType, key: str): self._ds = ds self._ot = ot self._key = key self._data = [] @property def object_type(self): return self._ot @property def key(self): return self._key def set(self, key: str, value: str): if self._ds._frozen: raise Exception("This data set is frozen") idx = self._ot.index_of(key, create=True) self._data.insert(idx, value) def _set_from_object(self, other: "Object"): for k in other._ot.fields: self.set(k, other.get(k)) def get(self, key: str) -> Optional[str]: idx = self._ot.index_of(key) return self._data[idx] class DataSet(object): def __init__(self, name): self._name = name self._object_types = {} self._objects_by_type = {} # type: Mapping[str, Mapping[str, Object]] self._frozen = False self._changed = False @property def name(self): return self._name def freeze(self): self._frozen = True def get_object_type(self, object_type: str) -> ObjectType: try: return self._object_types[object_type] except KeyError: ot = ObjectType(object_type) self._object_types[object_type] = ot self._changed = True return ot def get_object(self, object_type: str, key: str) -> Object: try: objects = self._objects_by_type[object_type] except KeyError: if self._frozen: raise Exception("This data set is frozen") objects = {} self._objects_by_type[object_type] = objects self._changed = True try: return objects[key] except KeyError: if self._frozen: raise Exception("This data set is frozen") ot = self.get_object_type(object_type) o = Object(self, ot, key) objects[key] = o self._changed = True return o def items(self): from itertools import chain return list(chain.from_iterable([objects.values() for objects in self._objects_by_type.values()])) def merge(self, other: "DataSet") -> "DataSet": ds = DataSet(self._name) for objects in self._objects_by_type.values(): for o in objects.values(): ds.get_object(o.object_type.name, o.key)._set_from_object(o) for objects in other._objects_by_type.values(): for o in objects.values(): ds.get_object(o.object_type.name, o.key)._set_from_object(o) return ds class DataSetManager(object): def __init__(self, basedir: Path): self._basedir = Path(basedir) def metafile_for_ds(self, ds_name) -> Path: return self._basedir / ds_name / "data-set.ini" def create_rw(self, name, inputs: List[str]) -> "LazyDataSet": return LazyDataSet(self, name, inputs) def load(self, name, freeze=False) -> DataSet: ds_dir = Path(name) if Path(name).is_absolute() else self._basedir / name ds_dir = ds_dir if ds_dir.is_dir() else ds_dir.parent logger.info("Loading DS from '{}'".format(ds_dir)) ini = self._load_ini(ds_dir / "data-set.ini") name = ini.get("data-set", "name") ds = DataSet(name) count = 0 for ot_path in ds_dir.glob("*"): if not ot_path.is_dir(): continue ot = ot_path.name logger.info(" Loading type '{}'".format(ot)) for o_path in ot_path.glob("*.ini"): count += 1 key = o_path.name[:-4] logger.info(" Loading key '{}'".format(key)) ini = self._load_ini(o_path) o = ds.get_object(ot, key) for k, v in ini.items("values"): o.set(k, v) if freeze: ds.freeze() logger.info("Loaded {} items".format(count)) return ds def store(self, ds: DataSet): ds_dir = self._basedir / ds.name logger.info("Storing DS '{}' with {} objects to {}".format(ds.name, len(ds.items()), ds_dir)) os.makedirs(ds_dir, exist_ok=True) ini = self._blank_ini() ini.add_section("data-set") ini.set("data-set", "name", ds.name) self._store_ini(ini, ds_dir / "data-set.ini") for o in ds.items(): ot = o.object_type key = o.key ot_dir = ds_dir / ot.name os.makedirs(ot_dir, exist_ok=True) ini = self._blank_ini() ini.add_section("meta") ini.set("meta", "type", ot.name) ini.add_section("values") for k in ot.fields: v = o.get(k) ini.set("values", k, v) self._store_ini(ini, ot_dir / "{}.ini".format(key)) def _blank_ini(self): return configparser.ConfigParser(interpolation = None) def _load_ini(self, path: Path): ini = self._blank_ini() if len(ini.read(str(path))) != 1: raise IOError("Could not load ini file: {}".format(path)) return ini def _store_ini(self, ini, path): with open(path, "w") as f: ini.write(f) class LazyDataSet(object): def __init__(self, dsm: DataSetManager, name, inputs): self._dsm = dsm self._name = name self._inputs = inputs def __enter__(self): # logger.info("enter: name={}, inputs={}".format(self._name, self._inputs)) ds = DataSet(self._name) for name in self._inputs: ds = ds.merge(self._dsm.load(name, freeze=True)) self._ds = ds return self._ds def __exit__(self, *args): # logger.info("exit: name={}, inputs={}".format(self._name, self._inputs)) # logger.info("ds.size={}".format(len(self._ds.items()))) self._dsm.store(self._ds) return False