-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] Módulo Gerador de Instâncias (#24)
Merge pull request #24 from SteffanoP/feature/instances-generator
- Loading branch information
Showing
8 changed files
with
309 additions
and
109 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from pandas import DataFrame | ||
import instances_generator.maker as maker | ||
|
||
class InstancesGenerator: | ||
""" | ||
A class to generate instances based on samples, attributes and classes. | ||
Methods | ||
------- | ||
generate(options): | ||
Generate a DataFrame of instances built by a maker based on samples, | ||
attributes and classes. | ||
""" | ||
|
||
types_generator = { | ||
1: '_blobs', | ||
2: '_moons', | ||
3: '_circles', | ||
4: '_classf', | ||
5: '_mlabel_classf' | ||
} | ||
|
||
def __init__(self, options: dict): | ||
""" | ||
Constructs the generator based on properties desired. | ||
Parameters | ||
---------- | ||
options : dict | ||
properties desired to generate a dataset (e.g. samples, | ||
attributes, classes). | ||
""" | ||
self._samples = options['samples'] | ||
self._attributes = options['attributes'] | ||
self._classes = options['classes'] | ||
self._optional_option = options['maker'][1] | ||
|
||
def generate(self, type_gen: int) -> DataFrame: | ||
""" | ||
Generate a DataFrame of instances built by a maker based on samples, | ||
attributes and classes. | ||
Parameters | ||
---------- | ||
type_gen : int, required | ||
Selects the type of maker desired to generate a data set | ||
Returns | ||
------- | ||
pandas.DataFrame | ||
""" | ||
return getattr(self, self.types_generator.get(type_gen))() | ||
|
||
def _blobs(self) -> DataFrame: | ||
centers = self._optional_option | ||
return maker.blobs(self._samples, centers, self._attributes) | ||
|
||
def _moons(self) -> DataFrame: | ||
noise = self._optional_option | ||
return maker.moons(self._samples, noise) | ||
|
||
def _circles(self) -> DataFrame: | ||
noise = self._optional_option | ||
return maker.circles(self._samples, noise) | ||
|
||
def _classf(self) -> DataFrame: | ||
return maker.classification(self._samples, self._attributes, | ||
self._classes) | ||
|
||
def _mlabel_classf(self) -> DataFrame: | ||
labels = self._optional_option | ||
return maker.multilabel_classification(self._samples, self._attributes, | ||
self._classes, labels) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import pandas as pd | ||
import numpy as np | ||
|
||
# Supported dataset generators | ||
from sklearn.datasets import make_blobs | ||
from sklearn.datasets import make_moons | ||
from sklearn.datasets import make_circles | ||
from sklearn.datasets import make_classification | ||
from sklearn.datasets import make_multilabel_classification as make_mlabel_classification | ||
|
||
# Use same random seed for multiple calls to make_multilabel_classification to | ||
# ensure same distributions | ||
RANDOM_SEED = np.random.randint(2 ** 10) | ||
|
||
def blobs(samples, centers, features): | ||
""" | ||
Generate isotropic Gaussian blobs for clustering, but resumes to 3 main | ||
parameters. | ||
See more at <https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html> | ||
Parameters | ||
---------- | ||
samples : int | ||
The total number of points equally divided among clusters. | ||
centers : int | ||
The number of centers to generate, or the fixed center locations. | ||
features : int | ||
The number of features for each sample. | ||
Returns | ||
------- | ||
DataFrame : pandas.DataFrame | ||
A DataFrame of the generated samples grouped by x and y. | ||
""" | ||
X, y = make_blobs(n_samples=samples, centers=centers, | ||
n_features=features) | ||
return _create_pd_dataframe(X, y) | ||
|
||
def moons(samples, noise): | ||
""" | ||
Make two interleaving half circles. | ||
See more at <https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html> | ||
Parameters | ||
---------- | ||
samples : int | ||
The total number of points generated. | ||
noise : int | ||
Standard deviation of Gaussian noise added to the data. | ||
Returns | ||
------- | ||
DataFrame : pandas.DataFrame | ||
A DataFrame of the generated samples grouped by x and y. | ||
""" | ||
X, y = make_moons(n_samples=samples, noise=noise) | ||
return _create_pd_dataframe(X, y) | ||
|
||
def circles(samples, noise): | ||
""" | ||
Make a large circle containing a smaller circle in 2d. | ||
See more: <https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_circles.html> | ||
Parameters | ||
---------- | ||
samples : int | ||
The total number of points generated. | ||
noise : int | ||
Standard deviation of Gaussian noise added to the data. | ||
Returns | ||
------- | ||
DataFrame : pandas.DataFrame | ||
A DataFrame of the generated samples grouped by x and y. | ||
""" | ||
X, y = make_circles(n_samples=samples, noise=noise) | ||
return _create_pd_dataframe(X, y) | ||
|
||
def classification(samples, features, classes): | ||
""" | ||
Generate a random n-class classification problem. | ||
See more at <https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html> | ||
Parameters | ||
---------- | ||
samples : int | ||
The total number of points. | ||
features : int | ||
The number of informative features. | ||
features : int | ||
The number of features for each sample. | ||
Returns | ||
------- | ||
DataFrame : pandas.DataFrame | ||
A DataFrame of the generated samples grouped by x and y. | ||
""" | ||
X, y = make_classification( | ||
n_samples=samples, | ||
n_features=features, | ||
n_classes=classes, | ||
n_redundant=0, | ||
n_informative=2, | ||
n_clusters_per_class=1 | ||
) | ||
return _create_pd_dataframe(X, y) | ||
|
||
def multilabel_classification(samples, features, classes, labels): | ||
X, y = make_mlabel_classification( | ||
n_samples=samples, | ||
n_features=features, | ||
n_classes=classes, | ||
n_labels=labels, | ||
allow_unlabeled=False, | ||
random_state=RANDOM_SEED | ||
) | ||
return _create_pd_dataframe(X, y) | ||
|
||
def _create_pd_dataframe(samples, label): | ||
df = pd.DataFrame(samples) | ||
df['label'] = label | ||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.