Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added comments to methods implementation #41

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions src/foapy/alphabet.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,54 @@ def alphabet(X) -> np.ndarray:
>>> result
Exception
"""
# ex.:
# data = ['a', 'c', 'c', 'e', 'd', 'a']
data = np.asanyarray(X)
if data.ndim > 1: # Checking for d1 array
raise Not1DArrayException(
{"message": f"Incorrect array form. Expected d1 array, exists {data.ndim}"}
)

# Array of indices that sort elements in ascending order
# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
perm = data.argsort(kind="mergesort")

mask_shape = data.shape
unique_mask = np.empty(mask_shape, dtype=bool)
# Create mask array to store True on positions where new value appears for the first
# time in the sorted array to distinguish where subarray of one element ends and
# another begins
# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
# perm[1:] = [ 5, 1, 2, 4, 3]
# perm[:-1] = [0, 5, 1, 2, 4 ]

# data[perm[1:]] = [ 'a', 'c', 'c', 'd', 'e']
# data[perm[:-1]] = [ 'a', 'a', 'c', 'c', 'd']
# data[perm[1:]] != data[perm[:-1]] = [ False, True, False, True, True]
# unique_mask = [True, False, True, False, True, True]
# a a c c d e
unique_mask = np.empty(data.shape, dtype=bool)
# First element is always new
unique_mask[:1] = True
# Set true on positions where value differs from previous
unique_mask[1:] = data[perm[1:]] != data[perm[:-1]]

# Create mask array to store True on positions of the data array
# where new value appears for the first time
# ex.:
# a a c c d e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# a a c c d e
# sorted data array a a c c d e

# unique_mask = [True, False, True, False, True, True]
# perm = [ 0, 5, 1, 2, 4, 3]
# perm[unique_mask] = [ 0, 1, 4, 3]
# result_mask = [True, True, False, True, True, False]
# a c c e d a
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have moved this one line higher and added that this is "data" array

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The difference is that on top in sorted data on bottom original order data

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# a c c e d a
# original data array a c c e d a

result_mask = np.full_like(unique_mask, False)
result_mask[:1] = True
result_mask[perm[unique_mask]] = True

# Return array of first occurrences of elements in the data array
# ex.:
# data = [ 'a', 'c', 'c', 'e', 'd', 'a' ]
# result_mask = [True, True, False, True, True, False]
# data[result_mask] = [ 'a', 'c', 'e', 'd' ]
return data[result_mask]
204 changes: 203 additions & 1 deletion src/foapy/intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,52 +61,254 @@ def intervals(X, bind, mod):
raise ValueError(
{"message": "Invalid mode value. Use mode.lossy,normal,cycle or redundant."}
)

# ex.:
# ar = ['a', 'c', 'c', 'e', 'd', 'a']
ar = np.asanyarray(X)

if ar.shape == (0,):
return []

if bind == binding.end:
# For binding to the end, we need to reverse the array
# ar = ['a', 'd', 'e', 'c', 'c', 'a']
ar = ar[::-1]

# Array of indices that sort elements in ascending order
# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
perm = ar.argsort(kind="mergesort")

# Create mask array to store True on positions where new value appears for the first
# time in the sorted array to distinguish where subarray of one element ends and
# another begins.
#
# Create shape length +1 of source,
# to use it as both first occurrence marker and
# last occurrence marker depending on the shift of the data array
#
# ex.:
# a a c c d e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# a a c c d e
# sorted data array a a c c d e

# perm = [0, 5, 1, 2, 4, 3]
# perm[1:] = [ 5, 1, 2, 4, 3]
# perm[:-1] = [0, 5, 1, 2, 4 ]

# data[perm[1:]] = [ 'a', 'c', 'c', 'd', 'e' ]
# data[perm[:-1]] = [ 'a', 'a', 'c', 'c', 'd' ]
# data[perm[1:]] != data[perm[:-1]] = [ False, True, False, True, True ]
# mask = [True, False, True, False, True, True, True]
# First appears a a c c d e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# First appears a a c c d e
# First occurrence a a c c d e

# Last appears a a c c d e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Last appears a a c c d e
# Last occurrence a a c c d e


mask_shape = ar.shape
mask = np.empty(mask_shape[0] + 1, dtype=bool)
mask[:1] = True
mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]]
mask[-1:] = True # or mask[-1] = True

# Create masks of first and last occurrences of elements by
# excluding first and last elements from unique_mask accordingly
# ex.:
#
# mask = [True, False, True, False, True, True, True]
# first_mask = [True, False, True, False, True, True ]
# a a c c d e
# last_mask = [ False, True, False, True, True, True]
# a a c c d e
first_mask = mask[:-1]
last_mask = mask[1:]

# Create tmp array to count intervals
intervals = np.empty(ar.shape, dtype=np.intp)

# Count intervals between elements.
# Intervals of first elements appears would be wrong on that stage.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Intervals of first elements appears would be wrong on that stage.
# Intervals of the first occurrence of all elements would be wrong on that stage.

# We will fix that later.
# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
# perm[1:] = [ 5, 1, 2, 4, 3]
# perm[:-1] = [ 0, 5, 1, 2, 4]
# perm[1:] - perm[:-1] = [ 5, -4, 1, 2, -1]
# intervals = [0, 5, -4, 1, 2, -1]
# ^ ^ ^ - wrong intervals
intervals[1:] = perm[1:] - perm[:-1]

# Fix first and last intervals
# For any mode except cycle delta would be 1
# For cycle mode delta would be an array

# ex.:
# len(ar) = 6
# a a c c d e
# perm = [ 0, 5, 1, 2, 4, 3]
# last_mask = [False, True, False, True, True, True]
# perm[last_mask] = [ 5, 2, 4, 3]
# len(ar) - perm[last_mask] = [ 1, 4, 2, 3]
# delta = [ 1, 4, 2, 3]
# a c d e
delta = len(ar) - perm[last_mask] if mod == mode.cycle else 1

# ex.:
# a a c c d e
# perm = [ 0, 5, 1, 2, 4, 3]
# first_mask = [True, False, True, False, True, True]
# perm[first_mask] = [ 0, 1, 4, 3]
# a c d e
# For all modes except cycle
# a a c c d e
# intervals = [ 0, 5, -4, 1, 2, -1]
# perm[first_mask] + delta = [ 1, 2, 5, 4]
# first_mask = [True, False, True, False, True, True]
# intervals = [ 1, 5, 2, 1, 5, 4]
# a a c c d e

# For cycle mode
# a a c c d e
# intervals = [ 0, 5, -4, 1, 2, -1]
# first_mask = [True, False, True, False, True, True]
# perm[first_mask] = [ 0, 1, 4, 3]
# delta = [ 1, 4, 2, 3]
# perm[first_mask] + delta = [ 1, 5, 6, 6]
# intervals = [ 1, 5, 5, 1, 6, 6]
# a a c c d e
intervals[first_mask] = perm[first_mask] + delta

# Create inverse permutation array
inverse_perm = np.empty(ar.shape, dtype=np.intp)
# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
# np.arange(ar.shape[0]) = [0, 1, 2, 3, 4, 5]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# a c c e d a
inverse_perm[perm] = np.arange(ar.shape[0])

# Create result array depending on mode
if mod == mode.lossy:
# For lossy mode we ignore intervals for a first appearance of the element
# ex.:
# a a c c d e
# intervals = [ 1, 5, 5, 1, 6, 6]
# first_mask = [True, False, True, False, True, True]
# intervals = [ 0, 5, 0, 1, 0, 0]
# a a c c d e
intervals[first_mask] = 0

# Permute intervals array to the original order
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Permute intervals array to the original order
# Permute intervals array to the original arrangement

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For clarity it is better to use "arrangement" here.

# ex.:
# a a c c d e
# intervals = [0, 5, 0, 1, 0, 0]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# intervals = [0, 0, 1, 0, 0, 5]
# a c c e d a
intervals = intervals[inverse_perm]

# Remove zeros from the array
# ex.:
# a c c e d a
# intervals = [0, 0, 1, 0, 0, 5]
# intervals[intervals != 0] = [ 1, 5]
# result = [ 1, 5]
# c a
result = intervals[intervals != 0]
elif mod == mode.normal:
# For normal mode we permute intervals array to the original order
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# For normal mode we permute intervals array to the original order
# For normal mode we permute intervals array to the original arrangement

# ex.:
# a a c c d e
# intervals = [1, 5, 2, 1, 5, 4]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# intervals[inverse_perm] = [1, 2, 1, 4, 5, 5]
# a c c e d a
# result = [1, 2, 1, 4, 5, 5]
result = intervals[inverse_perm]
elif mod == mode.cycle:
# For cycle mode we permute intervals array to the original order
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# For cycle mode we permute intervals array to the original order
# For cycle mode we permute intervals array to the original arrangement

# ex.:
# a a c c d e
# intervals = [1, 5, 5, 1, 6, 6]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# intervals[inverse_perm] = [1, 2, 1, 4, 5, 5]
# a c c e d a
# result = [1, 5, 1, 6, 5, 5]
result = intervals[inverse_perm]
elif mod == mode.redundant:
# For redundant mode we need to count intervals for the first and last
# appearance of an element

# ex.:
# a a c c d e
# intervals = [1, 5, 2, 1, 5, 4]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# intervals[inverse_perm] = [1, 2, 1, 4, 5, 5]
# a c c e d a
# result = [1, 2, 1, 4, 5, 5]

# Create 2-dimensional array size of (2, len(ar))
# Zero row is for intervals the first appearance of the element and intervals
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Zero row is for intervals the first appearance of the element and intervals
# Zero row is for the intervals of the first appearance of the element and intervals

# for intermediate appearances
# First row will store intervals for the last appearance of the element
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# First row will store intervals for the last appearance of the element
# First row will store only intervals for the last appearance of the elements

result = np.zeros(shape=ar.shape + (2,), dtype=int)

# ex.:
# a a c c d e
# intervals = [1, 5, 2, 1, 5, 4]
# result = [
# [1, 5, 2, 1, 5, 4]
# [0, 0, 0, 0, 0, 0]
# ]
result[:, 0] = intervals

# Set intervals for the last appearance of the element to the first row

# ex.:
# a a c c d e
# perm = [ 0, 5, 1, 2, 4, 3]
# last_mask = [False, True, False, True, True, True]
# perm[last_mask] = [ 5, 2, 4, 3]
# len(ar) - perm[last_mask] = [ 1, 4, 2, 3]
# result = [
# [ 1, 5, 2, 1, 5, 4]
# [ 0, 1, 0, 4, 2, 3]
# ]
result[last_mask, 1] = len(ar) - perm[last_mask]

# Permute intervals array to the original order
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Permute intervals array to the original order
# Permute intervals array to the original arrangement

# ex.:
# a a c c d e
# result = [
# [1, 5, 2, 1, 5, 4]
# [0, 1, 0, 4, 2, 3]
# ]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# result[inverse_perm] = [
# [1, 2, 1, 4, 5, 5]
# [0, 0, 4, 3, 2, 1]
# ]
# a c c e d a
result = result[inverse_perm]

# Flatten result array
# ex.:
# a c c e d a
# result[inverse_perm] = [
# [1, 2, 1, 4, 5, 5]
# [0, 0, 4, 3, 2, 1]
# ]
# result.ravel() = [ 1, 0, 2, 0, 1, 4, 4, 3, 5, 2, 5, 1]
# | a | c | c | e | d | a |
result = result.ravel()

# Exclude zeros from the result
# result = [ 1, 0, 2, 0, 1, 4, 4, 3, 5, 2, 5, 1]
# | a | c | c | e | d | a |

# result[result != 0] = [ 1, 2, 1, 4, 4, 3, 5, 2, 5, 1]
# |a |c | c | e | d | a |
result = result[result != 0]

if bind == binding.end:
# For binding to the end, we need to reverse the result
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# For binding to the end, we need to reverse the result
# For binding to the end, we need to reverse the result back

result = result[::-1]

return result
Loading
Loading