-
Notifications
You must be signed in to change notification settings - Fork 174
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add routines for saving arrays in npy format
- Loading branch information
Showing
3 changed files
with
225 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
! SPDX-Identifer: MIT | ||
|
||
#:include "common.fypp" | ||
#:set RANKS = range(1, MAXRANK + 1) | ||
#:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES | ||
|
||
!> Description of the npy format taken from | ||
!> https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html | ||
!> | ||
!>## Format Version 1.0 | ||
!> | ||
!> The first 6 bytes are a magic string: exactly \x93NUMPY. | ||
!> | ||
!> The next 1 byte is an unsigned byte: | ||
!> the major version number of the file format, e.g. \x01. | ||
!> | ||
!> The next 1 byte is an unsigned byte: | ||
!> the minor version number of the file format, e.g. \x00. | ||
!> Note: the version of the file format is not tied to the version of the numpy package. | ||
!> | ||
!> The next 2 bytes form a little-endian unsigned short int: | ||
!> the length of the header data HEADER_LEN. | ||
!> | ||
!> The next HEADER_LEN bytes form the header data describing the array’s format. | ||
!> It is an ASCII string which contains a Python literal expression of a dictionary. | ||
!> It is terminated by a newline (\n) and padded with spaces (\x20) to make the total | ||
!> of len(magic string) + 2 + len(length) + HEADER_LEN be evenly divisible by 64 for | ||
!> alignment purposes. | ||
!> | ||
!> The dictionary contains three keys: | ||
!> | ||
!> - “descr”: dtype.descr | ||
!> An object that can be passed as an argument to the numpy.dtype constructor | ||
!> to create the array’s dtype. | ||
!> | ||
!> - “fortran_order”: bool | ||
!> Whether the array data is Fortran-contiguous or not. Since Fortran-contiguous | ||
!> arrays are a common form of non-C-contiguity, we allow them to be written directly | ||
!> to disk for efficiency. | ||
!> | ||
!> - “shape”: tuple of int | ||
!> The shape of the array. | ||
!> | ||
!> For repeatability and readability, the dictionary keys are sorted in alphabetic order. | ||
!> This is for convenience only. A writer SHOULD implement this if possible. A reader MUST | ||
!> NOT depend on this. | ||
!> | ||
!> Following the header comes the array data. If the dtype contains Python objects | ||
!> (i.e. dtype.hasobject is True), then the data is a Python pickle of the array. | ||
!> Otherwise the data is the contiguous (either C- or Fortran-, depending on fortran_order) | ||
!> bytes of the array. Consumers can figure out the number of bytes by multiplying the | ||
!> number of elements given by the shape (noting that shape=() means there is 1 element) | ||
!> by dtype.itemsize. | ||
!> | ||
!>## Format Version 2.0 | ||
!> | ||
!> The version 1.0 format only allowed the array header to have a total size of 65535 bytes. | ||
!> This can be exceeded by structured arrays with a large number of columns. | ||
!> The version 2.0 format extends the header size to 4 GiB. numpy.save will automatically | ||
!> save in 2.0 format if the data requires it, else it will always use the more compatible | ||
!> 1.0 format. | ||
!> | ||
!> The description of the fourth element of the header therefore has become: | ||
!> “The next 4 bytes form a little-endian unsigned int: the length of the header data | ||
!> HEADER_LEN.” | ||
!> | ||
!>## Format Version 3.0 | ||
!> | ||
!> This version replaces the ASCII string (which in practice was latin1) with a | ||
!> utf8-encoded string, so supports structured types with any unicode field names. | ||
module stdlib_io_npy | ||
use stdlib_error, only : error_stop | ||
use stdlib_kinds, only : int8, int16, int32, int64, sp, dp, xdp, qp | ||
use stdlib_strings, only : to_string | ||
implicit none | ||
private | ||
|
||
public :: save_npy | ||
|
||
|
||
!> Save multidimensional array in npy format | ||
interface save_npy | ||
#:for k1, t1 in KINDS_TYPES | ||
#:for rank in RANKS | ||
module procedure save_npy_${t1[0]}$${k1}$_${rank}$ | ||
#:endfor | ||
#:endfor | ||
end interface save_npy | ||
|
||
|
||
character(len=*), parameter :: nl = char(10) | ||
|
||
character(len=*), parameter :: & | ||
type_iint8 = "<i1", type_iint16 = "<i2", type_iint32 = "<i4", type_iint64 = "<i8", & | ||
type_rsp = "<f4", type_rdp = "<f8", type_rxdp = "<f10", type_rqp = "<f16", & | ||
type_csp = "<c8", type_cdp = "<c16", type_cxdp = "<c20", type_cqp = "<c32" | ||
|
||
character(len=*), parameter :: & | ||
& magic_number = char(int(z"93")), & | ||
& magic_string = "NUMPY" | ||
|
||
contains | ||
|
||
|
||
!> Generate magic header string for npy format | ||
pure function magic_header(major, minor) result(str) | ||
!> Major version of npy format | ||
integer, intent(in) :: major | ||
!> Minor version of npy format | ||
integer, intent(in) :: minor | ||
!> Magic string for npy format | ||
character(len=8) :: str | ||
|
||
str = magic_number // magic_string // char(major) // char(minor) | ||
end function magic_header | ||
|
||
|
||
!> Generate header for npy format | ||
pure function npy_header(vtype, vshape) result(str) | ||
!> Type of variable | ||
character(len=*), intent(in) :: vtype | ||
!> Shape of variable | ||
integer, intent(in) :: vshape(:) | ||
!> Header string for npy format | ||
character(len=:), allocatable :: str | ||
|
||
integer, parameter :: len_v10 = 8 + 2, len_v20 = 8 + 4 | ||
|
||
str = & | ||
"{'descr': '"//vtype//& | ||
"', 'fortran_order': True, 'shape': "//& | ||
shape_str(vshape)//", }" | ||
|
||
if (len(str) + len_v10 >= 65535) then | ||
str = str // & | ||
& repeat(" ", 16 - mod(len(str) + len_v20 + 1, 16)) // nl | ||
str = magic_header(2, 0) // to_bytes_i4(int(len(str))) // str | ||
else | ||
str = str // & | ||
& repeat(" ", 16 - mod(len(str) + len_v10 + 1, 16)) // nl | ||
str = magic_header(1, 0) // to_bytes_i2(int(len(str))) // str | ||
end if | ||
end function npy_header | ||
|
||
!> Write integer as byte string in little endian encoding | ||
pure function to_bytes_i4(val) result(str) | ||
!> Integer value to convert to bytes | ||
integer, intent(in) :: val | ||
!> String of bytes | ||
character(len=4), allocatable :: str | ||
|
||
str = char(mod(val, 2**8)) // & | ||
& char(mod(val, 2**16) / 2**8) // & | ||
& char(mod(val, 2**32) / 2**16) // & | ||
& char(val / 2**32) | ||
end function to_bytes_i4 | ||
|
||
|
||
!> Write integer as byte string in little endian encoding, 2-byte truncated version | ||
pure function to_bytes_i2(val) result(str) | ||
!> Integer value to convert to bytes | ||
integer, intent(in) :: val | ||
!> String of bytes | ||
character(len=2), allocatable :: str | ||
|
||
str = char(mod(val, 2**8)) // & | ||
& char(mod(val, 2**16) / 2**8) | ||
end function to_bytes_i2 | ||
|
||
|
||
!> Print array shape as tuple of int | ||
pure function shape_str(vshape) result(str) | ||
!> Shape of variable | ||
integer, intent(in) :: vshape(:) | ||
!> Shape string for npy format | ||
character(len=:), allocatable :: str | ||
|
||
integer :: i | ||
|
||
str = "(" | ||
do i = 1, size(vshape) | ||
str = str//to_string(vshape(i))//", " | ||
enddo | ||
str = str//")" | ||
end function shape_str | ||
|
||
|
||
#:for k1, t1 in KINDS_TYPES | ||
#:for rank in RANKS | ||
!> Save ${rank}$-dimensional array in npy format | ||
subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) | ||
character(len=*), intent(in) :: filename | ||
${t1}$, intent(in) :: array${ranksuffix(rank)}$ | ||
integer, intent(out), optional :: iostat | ||
character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ | ||
|
||
integer :: io, stat | ||
|
||
open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) | ||
if (stat == 0) then | ||
write(io, iostat=stat) npy_header(vtype, shape(array)) | ||
end if | ||
if (stat == 0) then | ||
write(io, iostat=stat) array | ||
end if | ||
close(io, iostat=stat) | ||
|
||
if (present(iostat)) then | ||
iostat = stat | ||
else if (stat /= 0) then | ||
call error_stop("Failed to write array to file '"//filename//"'") | ||
end if | ||
|
||
end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ | ||
#:endfor | ||
#:endfor | ||
|
||
|
||
end module stdlib_io_npy |