Skip to content

Commit

Permalink
fix formatting errors
Browse files Browse the repository at this point in the history
  • Loading branch information
scottgigante committed Jan 22, 2018
1 parent 6b8e950 commit c3d7446
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 7 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ You can also get the associated minimum-cost flow:
>>> emd_with_flow(first_histogram, second_histogram, distance_matrix)
(3.5, [[0.0, 0.0], [0.0, 1.0]])
You can also calculate the EMD directly from two arrays:
You can also calculate the EMD directly from two arrays of observations:

.. code:: python
Expand Down
2 changes: 1 addition & 1 deletion pyemd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
>>> emd_with_flow(first_signature, second_signature, distance_matrix)
(3.5, [[0.0, 0.0], [0.0, 1.0]])
You can also calculate the EMD directly from two arrays:
You can also calculate the EMD directly from two arrays of observations:
>>> from pyemd import emd_samples
>>> first_array = [1,2,3,4]
Expand Down
12 changes: 7 additions & 5 deletions pyemd/emd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,17 @@ def emd_samples(first_array,
range = (min(np.min(first_array), np.min(second_array)),
max(np.max(first_array), np.max(second_array)))

if type(bins) == str:
if isinstance(bins, str):
hist, _ = np.histogram(np.concatenate([first_array,
second_array]),
range=range, bins=bins)
range=range,
bins=bins)
bins = len(hist)

if distance == 'euclidean':
distance = euclidean_pairwise_distance

# compute histograms
# Compute histograms
first_histogram, bin_edges = np.histogram(first_array,
range=range,
bins=bins)
Expand All @@ -161,14 +162,14 @@ def emd_samples(first_array,
bins=bins)

if normalized:
# normalize histograms to represent fraction of dataset in each bin
# Normalize histograms to represent fraction of dataset in each bin
first_histogram = first_histogram/np.sum(first_histogram)
second_histogram = second_histogram/np.sum(second_histogram)
else:
first_histogram = first_histogram.astype(np.float64)
second_histogram = second_histogram.astype(np.float64)

# compute the distance matrix between the center of each bin
# Compute the distance matrix between the center of each bin
bin_locations = np.mean([bin_edges[:-1], bin_edges[1:]], axis=0)
distance_matrix = distance(bin_locations)

Expand All @@ -177,6 +178,7 @@ def emd_samples(first_array,
distance_matrix,
extra_mass_penalty=extra_mass_penalty)


def emd_with_flow(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram,
np.ndarray[np.float64_t, ndim=1, mode="c"] second_histogram,
np.ndarray[np.float64_t, ndim=2, mode="c"] distance_matrix,
Expand Down

0 comments on commit c3d7446

Please sign in to comment.