Module cevast.dataset.managers.manager
This module contains DatasetManager interface.
Expand source code
"""This module contains DatasetManager interface."""
from typing import Tuple, Union, Type
from datetime import datetime
from abc import ABC, abstractmethod, abstractclassmethod
from enum import IntEnum
from cevast.dataset.dataset import DatasetSource, Dataset
from cevast.certdb import CertDB
from cevast.analysis import CertAnalyser
class DatasetManagerTask(IntEnum):
"""Enumeration of DatasetManager Tasks"""
COLLECT = 1
FILTER = 2
UNIFY = 3
ANALYSE = 4
@classmethod
def validate(cls, state: Union['DatasetManagerTask', str]) -> bool:
"""Validate DatasetManagerTask."""
if isinstance(state, cls):
return state in cls
if isinstance(state, str):
return state in cls.__members__
return False
def __str__(self):
return str(self.name)
class DatasetManager(ABC):
"""
An abstract DatasetManager class representing an interface that can be used to perform
various tasks with a certificate dataset.
For Manager to perform a task, a repository path and date must be provided. Date works
as an identifier of the dataset even though the date don't need to match exactly
- the newest dataset by that date is identified. Additionally a port number might be
used to more specify the dataset.
DatasetManager offers performing tasks independently or running a series of tasks at once
by `run` method (usefull for performing tasks that would be rather complex and/or long-lasting
running separatelly). Running a series might also be more optimized.
"""
@property
@abstractclassmethod
def dataset_source(cls) -> DatasetSource:
"""
Dataset source property used to identify a manager specification.
"""
@abstractmethod
def __init__(self, repository: str, date: datetime.date = datetime.today().date(),
ports: Tuple[str] = ('443',), cpu_cores: int = 1):
"""
Initialize Manager.
`repository` is dataset repository,
'date' is date,
'ports' is list of ports more specifying datasets,
'cpu_cores' is maximum number of CPU cores that might be used.
"""
@abstractmethod
def run(self, task_pipline: Tuple[Tuple[DatasetManagerTask, dict]]) -> None:
"""
Run a series of tasks.
`task_pipline` is tuple composed of the required tasks in form of pairs ('task', 'cfg'), where:
- 'task' is supported DatasetManagerTask,
- 'cfg' is dictionary filled of parameters that will be passed to individual task methods.
Caller function must ensure that 'cfg' parameters match task method's declaration.
TODO make cfg dict optional
"""
@abstractmethod
def collect(self, api_key: str = None) -> Tuple[Dataset]:
"""
Collect a dataset.
`api_key` is API access key that might be needed to retrieve datasets (depends on source implementation).
Return tuple of collected Datasets.
"""
@abstractmethod
def filter(self, methods: list = None) -> Tuple[Dataset]:
"""
Filter a dataset with given methods.
Return tuple of filtered Datasets.
"""
@abstractmethod
def unify(self, certdb: CertDB) -> Tuple[Dataset]:
"""
Unify a dataset.
`certdb` is CertDB instance to work with (to insert parsed certificates to).
Return tuple of unified Datasets.
"""
@abstractmethod
def analyse(self, analyser: Type[CertAnalyser], analyser_cfg: dict) -> Tuple[Dataset]:
"""
Analyse a dataset with given analyser.
`analyser` is a CertAnalyser class,
`analyser_cfg` is a dictionary with analyser paramaters that will be passed to the initializer.
Call to analyser is performed like this: analyser(cert_chain, **analyser_cfg).
Return tuple of analysed Datasets.
"""
Classes
class DatasetManager (repository: str, date:
= datetime.date(2021, 5, 4), ports: Tuple[str] = ('443',), cpu_cores: int = 1) -
An abstract DatasetManager class representing an interface that can be used to perform various tasks with a certificate dataset.
For Manager to perform a task, a repository path and date must be provided. Date works as an identifier of the dataset even though the date don't need to match exactly - the newest dataset by that date is identified. Additionally a port number might be used to more specify the dataset.
DatasetManager offers performing tasks independently or running a series of tasks at once by
run
method (usefull for performing tasks that would be rather complex and/or long-lasting running separatelly). Running a series might also be more optimized.Initialize Manager.
repository
is dataset repository, 'date' is date, 'ports' is list of ports more specifying datasets, 'cpu_cores' is maximum number of CPU cores that might be used.Expand source code
class DatasetManager(ABC): """ An abstract DatasetManager class representing an interface that can be used to perform various tasks with a certificate dataset. For Manager to perform a task, a repository path and date must be provided. Date works as an identifier of the dataset even though the date don't need to match exactly - the newest dataset by that date is identified. Additionally a port number might be used to more specify the dataset. DatasetManager offers performing tasks independently or running a series of tasks at once by `run` method (usefull for performing tasks that would be rather complex and/or long-lasting running separatelly). Running a series might also be more optimized. """ @property @abstractclassmethod def dataset_source(cls) -> DatasetSource: """ Dataset source property used to identify a manager specification. """ @abstractmethod def __init__(self, repository: str, date: datetime.date = datetime.today().date(), ports: Tuple[str] = ('443',), cpu_cores: int = 1): """ Initialize Manager. `repository` is dataset repository, 'date' is date, 'ports' is list of ports more specifying datasets, 'cpu_cores' is maximum number of CPU cores that might be used. """ @abstractmethod def run(self, task_pipline: Tuple[Tuple[DatasetManagerTask, dict]]) -> None: """ Run a series of tasks. `task_pipline` is tuple composed of the required tasks in form of pairs ('task', 'cfg'), where: - 'task' is supported DatasetManagerTask, - 'cfg' is dictionary filled of parameters that will be passed to individual task methods. Caller function must ensure that 'cfg' parameters match task method's declaration. TODO make cfg dict optional """ @abstractmethod def collect(self, api_key: str = None) -> Tuple[Dataset]: """ Collect a dataset. `api_key` is API access key that might be needed to retrieve datasets (depends on source implementation). Return tuple of collected Datasets. """ @abstractmethod def filter(self, methods: list = None) -> Tuple[Dataset]: """ Filter a dataset with given methods. Return tuple of filtered Datasets. """ @abstractmethod def unify(self, certdb: CertDB) -> Tuple[Dataset]: """ Unify a dataset. `certdb` is CertDB instance to work with (to insert parsed certificates to). Return tuple of unified Datasets. """ @abstractmethod def analyse(self, analyser: Type[CertAnalyser], analyser_cfg: dict) -> Tuple[Dataset]: """ Analyse a dataset with given analyser. `analyser` is a CertAnalyser class, `analyser_cfg` is a dictionary with analyser paramaters that will be passed to the initializer. Call to analyser is performed like this: analyser(cert_chain, **analyser_cfg). Return tuple of analysed Datasets. """
Ancestors
- abc.ABC
Subclasses
Instance variables
var dataset_source
-
A decorator indicating abstract classmethods.
Similar to abstractmethod.
Usage
class C(metaclass=ABCMeta): @abstractclassmethod def my_abstract_classmethod(cls, …): …
'abstractclassmethod' is deprecated. Use 'classmethod' with 'abstractmethod' instead.
Methods
def analyse(self, analyser: Type[CertAnalyser], analyser_cfg: dict) ‑> Tuple[Dataset]
-
Analyse a dataset with given analyser.
analyser
is a CertAnalyser class,analyser_cfg
is a dictionary with analyser paramaters that will be passed to the initializer.Call to analyser is performed like this: analyser(cert_chain, **analyser_cfg).
Return tuple of analysed Datasets.
Expand source code
@abstractmethod def analyse(self, analyser: Type[CertAnalyser], analyser_cfg: dict) -> Tuple[Dataset]: """ Analyse a dataset with given analyser. `analyser` is a CertAnalyser class, `analyser_cfg` is a dictionary with analyser paramaters that will be passed to the initializer. Call to analyser is performed like this: analyser(cert_chain, **analyser_cfg). Return tuple of analysed Datasets. """
def collect(self, api_key: str = None) ‑> Tuple[Dataset]
-
Collect a dataset.
api_key
is API access key that might be needed to retrieve datasets (depends on source implementation). Return tuple of collected Datasets.Expand source code
@abstractmethod def collect(self, api_key: str = None) -> Tuple[Dataset]: """ Collect a dataset. `api_key` is API access key that might be needed to retrieve datasets (depends on source implementation). Return tuple of collected Datasets. """
def filter(self, methods: list = None) ‑> Tuple[Dataset]
-
Filter a dataset with given methods. Return tuple of filtered Datasets.
Expand source code
@abstractmethod def filter(self, methods: list = None) -> Tuple[Dataset]: """ Filter a dataset with given methods. Return tuple of filtered Datasets. """
def run(self, task_pipline: Tuple[Tuple[DatasetManagerTask, dict]]) ‑> NoneType
-
Run a series of tasks.
task_pipline
is tuple composed of the required tasks in form of pairs ('task', 'cfg'), where: - 'task' is supported DatasetManagerTask, - 'cfg' is dictionary filled of parameters that will be passed to individual task methods. Caller function must ensure that 'cfg' parameters match task method's declaration. TODO make cfg dict optionalExpand source code
@abstractmethod def run(self, task_pipline: Tuple[Tuple[DatasetManagerTask, dict]]) -> None: """ Run a series of tasks. `task_pipline` is tuple composed of the required tasks in form of pairs ('task', 'cfg'), where: - 'task' is supported DatasetManagerTask, - 'cfg' is dictionary filled of parameters that will be passed to individual task methods. Caller function must ensure that 'cfg' parameters match task method's declaration. TODO make cfg dict optional """
def unify(self, certdb: CertDB) ‑> Tuple[Dataset]
-
Unify a dataset.
certdb
is CertDB instance to work with (to insert parsed certificates to). Return tuple of unified Datasets.Expand source code
@abstractmethod def unify(self, certdb: CertDB) -> Tuple[Dataset]: """ Unify a dataset. `certdb` is CertDB instance to work with (to insert parsed certificates to). Return tuple of unified Datasets. """
class DatasetManagerTask (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
Enumeration of DatasetManager Tasks
Expand source code
class DatasetManagerTask(IntEnum): """Enumeration of DatasetManager Tasks""" COLLECT = 1 FILTER = 2 UNIFY = 3 ANALYSE = 4 @classmethod def validate(cls, state: Union['DatasetManagerTask', str]) -> bool: """Validate DatasetManagerTask.""" if isinstance(state, cls): return state in cls if isinstance(state, str): return state in cls.__members__ return False def __str__(self): return str(self.name)
Ancestors
- enum.IntEnum
- builtins.int
- enum.Enum
Class variables
var ANALYSE
var COLLECT
var FILTER
var UNIFY
Static methods
def validate(state: Union[_ForwardRef('DatasetManagerTask'), str]) ‑> bool
-
Validate DatasetManagerTask.
Expand source code
@classmethod def validate(cls, state: Union['DatasetManagerTask', str]) -> bool: """Validate DatasetManagerTask.""" if isinstance(state, cls): return state in cls if isinstance(state, str): return state in cls.__members__ return False