Skip to content

Commit

Permalink
Add routines for saving arrays in npy format
Browse files Browse the repository at this point in the history
  • Loading branch information
awvwgk committed Nov 27, 2021
1 parent 57c41f9 commit 6fa3355
Show file tree
Hide file tree
Showing 3 changed files with 225 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(fppFiles
stdlib_bitsets_64.fypp
stdlib_bitsets_large.fypp
stdlib_io.fypp
stdlib_io_npy.fypp
stdlib_kinds.fypp
stdlib_linalg.fypp
stdlib_linalg_diag.fypp
Expand Down
5 changes: 5 additions & 0 deletions src/Makefile.manual
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ SRCFYPP = \
stdlib_bitsets_large.fypp \
stdlib_bitsets.fypp \
stdlib_io.fypp \
stdlib_io_npy.fypp \
stdlib_kinds.fypp \
stdlib_linalg.fypp \
stdlib_linalg_diag.fypp \
Expand Down Expand Up @@ -87,6 +88,10 @@ stdlib_io.o: \
stdlib_optval.o \
stdlib_kinds.o \
stdlib_ascii.o
stdlib_io_npy.o: \
stdlib_error.o \
stdlib_kinds.o \
stdlib_strings.o
stdlib_linalg.o: \
stdlib_kinds.o
stdlib_linalg_diag.o: \
Expand Down
219 changes: 219 additions & 0 deletions src/stdlib_io_npy.fypp
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
! SPDX-Identifer: MIT

#:include "common.fypp"
#:set RANKS = range(1, MAXRANK + 1)
#:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES

!> Description of the npy format taken from
!> https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html
!>
!>## Format Version 1.0
!>
!> The first 6 bytes are a magic string: exactly \x93NUMPY.
!>
!> The next 1 byte is an unsigned byte:
!> the major version number of the file format, e.g. \x01.
!>
!> The next 1 byte is an unsigned byte:
!> the minor version number of the file format, e.g. \x00.
!> Note: the version of the file format is not tied to the version of the numpy package.
!>
!> The next 2 bytes form a little-endian unsigned short int:
!> the length of the header data HEADER_LEN.
!>
!> The next HEADER_LEN bytes form the header data describing the array’s format.
!> It is an ASCII string which contains a Python literal expression of a dictionary.
!> It is terminated by a newline (\n) and padded with spaces (\x20) to make the total
!> of len(magic string) + 2 + len(length) + HEADER_LEN be evenly divisible by 64 for
!> alignment purposes.
!>
!> The dictionary contains three keys:
!>
!> - “descr”: dtype.descr
!> An object that can be passed as an argument to the numpy.dtype constructor
!> to create the array’s dtype.
!>
!> - “fortran_order”: bool
!> Whether the array data is Fortran-contiguous or not. Since Fortran-contiguous
!> arrays are a common form of non-C-contiguity, we allow them to be written directly
!> to disk for efficiency.
!>
!> - “shape”: tuple of int
!> The shape of the array.
!>
!> For repeatability and readability, the dictionary keys are sorted in alphabetic order.
!> This is for convenience only. A writer SHOULD implement this if possible. A reader MUST
!> NOT depend on this.
!>
!> Following the header comes the array data. If the dtype contains Python objects
!> (i.e. dtype.hasobject is True), then the data is a Python pickle of the array.
!> Otherwise the data is the contiguous (either C- or Fortran-, depending on fortran_order)
!> bytes of the array. Consumers can figure out the number of bytes by multiplying the
!> number of elements given by the shape (noting that shape=() means there is 1 element)
!> by dtype.itemsize.
!>
!>## Format Version 2.0
!>
!> The version 1.0 format only allowed the array header to have a total size of 65535 bytes.
!> This can be exceeded by structured arrays with a large number of columns.
!> The version 2.0 format extends the header size to 4 GiB. numpy.save will automatically
!> save in 2.0 format if the data requires it, else it will always use the more compatible
!> 1.0 format.
!>
!> The description of the fourth element of the header therefore has become:
!> “The next 4 bytes form a little-endian unsigned int: the length of the header data
!> HEADER_LEN.”
!>
!>## Format Version 3.0
!>
!> This version replaces the ASCII string (which in practice was latin1) with a
!> utf8-encoded string, so supports structured types with any unicode field names.
module stdlib_io_npy
use stdlib_error, only : error_stop
use stdlib_kinds, only : int8, int16, int32, int64, sp, dp, xdp, qp
use stdlib_strings, only : to_string
implicit none
private

public :: save_npy


!> Save multidimensional array in npy format
interface save_npy
#:for k1, t1 in KINDS_TYPES
#:for rank in RANKS
module procedure save_npy_${t1[0]}$${k1}$_${rank}$
#:endfor
#:endfor
end interface save_npy


character(len=*), parameter :: nl = char(10)

character(len=*), parameter :: &
type_iint8 = "<i1", type_iint16 = "<i2", type_iint32 = "<i4", type_iint64 = "<i8", &
type_rsp = "<f4", type_rdp = "<f8", type_rxdp = "<f10", type_rqp = "<f16", &
type_csp = "<c8", type_cdp = "<c16", type_cxdp = "<c20", type_cqp = "<c32"

character(len=*), parameter :: &
& magic_number = char(int(z"93")), &
& magic_string = "NUMPY"

contains


!> Generate magic header string for npy format
pure function magic_header(major, minor) result(str)
!> Major version of npy format
integer, intent(in) :: major
!> Minor version of npy format
integer, intent(in) :: minor
!> Magic string for npy format
character(len=8) :: str

str = magic_number // magic_string // char(major) // char(minor)
end function magic_header


!> Generate header for npy format
pure function npy_header(vtype, vshape) result(str)
!> Type of variable
character(len=*), intent(in) :: vtype
!> Shape of variable
integer, intent(in) :: vshape(:)
!> Header string for npy format
character(len=:), allocatable :: str

integer, parameter :: len_v10 = 8 + 2, len_v20 = 8 + 4

str = &
"{'descr': '"//vtype//&
"', 'fortran_order': True, 'shape': "//&
shape_str(vshape)//", }"

if (len(str) + len_v10 >= 65535) then
str = str // &
& repeat(" ", 16 - mod(len(str) + len_v20 + 1, 16)) // nl
str = magic_header(2, 0) // to_bytes_i4(int(len(str))) // str
else
str = str // &
& repeat(" ", 16 - mod(len(str) + len_v10 + 1, 16)) // nl
str = magic_header(1, 0) // to_bytes_i2(int(len(str))) // str
end if
end function npy_header

!> Write integer as byte string in little endian encoding
pure function to_bytes_i4(val) result(str)
!> Integer value to convert to bytes
integer, intent(in) :: val
!> String of bytes
character(len=4), allocatable :: str

str = char(mod(val, 2**8)) // &
& char(mod(val, 2**16) / 2**8) // &
& char(mod(val, 2**32) / 2**16) // &
& char(val / 2**32)
end function to_bytes_i4


!> Write integer as byte string in little endian encoding, 2-byte truncated version
pure function to_bytes_i2(val) result(str)
!> Integer value to convert to bytes
integer, intent(in) :: val
!> String of bytes
character(len=2), allocatable :: str

str = char(mod(val, 2**8)) // &
& char(mod(val, 2**16) / 2**8)
end function to_bytes_i2


!> Print array shape as tuple of int
pure function shape_str(vshape) result(str)
!> Shape of variable
integer, intent(in) :: vshape(:)
!> Shape string for npy format
character(len=:), allocatable :: str

integer :: i

str = "("
do i = 1, size(vshape)
str = str//to_string(vshape(i))//", "
enddo
str = str//")"
end function shape_str


#:for k1, t1 in KINDS_TYPES
#:for rank in RANKS
!> Save ${rank}$-dimensional array in npy format
subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat)
character(len=*), intent(in) :: filename
${t1}$, intent(in) :: array${ranksuffix(rank)}$
integer, intent(out), optional :: iostat
character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$

integer :: io, stat

open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat)
if (stat == 0) then
write(io, iostat=stat) npy_header(vtype, shape(array))
end if
if (stat == 0) then
write(io, iostat=stat) array
end if
close(io, iostat=stat)

if (present(iostat)) then
iostat = stat
else if (stat /= 0) then
call error_stop("Failed to write array to file '"//filename//"'")
end if

end subroutine save_npy_${t1[0]}$${k1}$_${rank}$
#:endfor
#:endfor


end module stdlib_io_npy

0 comments on commit 6fa3355

Please sign in to comment.