Python decompression relative performance?

You can use Python-blosc

It is very fast and for small arrays (<2GB) also quite easy to use. On easily compressable data like your example, it is often faster to compress the data for IO operations. (SATA-SSD: about 500 MB/s, PCIe- SSD: up to 3500MB/s) In the decompression step the array allocation is the most costly part. If your images are of similar shape you can avoid repeated memory allocation.

Example

A contigous array is assumed for the following example.

import blosc
import pickle

def compress(arr,Path):
    #c = blosc.compress_ptr(arr.__array_interface__['data'][0], arr.size, arr.dtype.itemsize, clevel=3,cname="lz4",shuffle=blosc.SHUFFLE)
    c = blosc.compress_ptr(arr.__array_interface__['data'][0], arr.size, arr.dtype.itemsize, clevel=3,cname="zstd",shuffle=blosc.SHUFFLE)
    f=open(Path,"wb")
    pickle.dump((arr.shape, arr.dtype),f)
    f.write(c)
    f.close()
    return c,arr.shape, arr.dtype

def decompress(Path):
    f=open(Path,"rb")
    shape,dtype=pickle.load(f)
    c=f.read()
    #array allocation takes most of the time
    arr=np.empty(shape,dtype)
    blosc.decompress_ptr(c, arr.__array_interface__['data'][0])
    return arr

#Pass a preallocated array if you have many similar images
def decompress_pre(Path,arr):
    f=open(Path,"rb")
    shape,dtype=pickle.load(f)
    c=f.read()
    #array allocation takes most of the time
    blosc.decompress_ptr(c, arr.__array_interface__['data'][0])
    return arr

Benchmarks

#blosc.SHUFFLE, cname="zstd" -> 4728KB,  
%timeit compress(arr,"Test.dat")
1.03 s ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#611 MB/s
%timeit decompress("Test.dat")
146 ms ± 481 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
#4310 MB/s
%timeit decompress_pre("Test.dat",arr)
50.9 ms ± 438 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
#12362 MB/s

#blosc.SHUFFLE, cname="lz4" -> 9118KB, 
%timeit compress(arr,"Test.dat")
32.1 ms ± 437 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
#19602 MB/s
%timeit decompress("Test.dat")
146 ms ± 332 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
#4310 MB/s
%timeit decompress_pre("Test.dat",arr)
53.6 ms ± 82.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
#11740 MB/s

Edit

This version is more for general use. It does handle f-contiguous, c-contiguous and non-contiguous arrays and arrays >2GB. Also have a look at bloscpack.

import blosc
import pickle

def compress(file, arr,clevel=3,cname="lz4",shuffle=1):
    """
    file           path to file
    arr            numpy nd-array
    clevel         0..9
    cname          blosclz,lz4,lz4hc,snappy,zlib
    shuffle        0-> no shuffle, 1->shuffle,2->bitshuffle
    """
    max_blk_size=100_000_000 #100 MB 

    shape=arr.shape
    #dtype np.object is not implemented
    if arr.dtype==np.object:
        raise(TypeError("dtype np.object is not implemented"))

    #Handling of fortran ordered arrays (avoid copy)
    is_f_contiguous=False
    if arr.flags['F_CONTIGUOUS']==True:
        is_f_contiguous=True
        arr=arr.T.reshape(-1)
    else:
        arr=np.ascontiguousarray(arr.reshape(-1))

    #Writing
    max_num=max_blk_size//arr.dtype.itemsize
    num_chunks=arr.size//max_num

    if arr.size%max_num!=0:
        num_chunks+=1

    f=open(file,"wb")
    pickle.dump((shape,arr.size,arr.dtype,is_f_contiguous,num_chunks,max_num),f)
    size=np.empty(1,np.uint32)
    num_write=max_num
    for i in range(num_chunks):
        if max_num*(i+1)>arr.size:
            num_write=arr.size-max_num*i
        c = blosc.compress_ptr(arr[max_num*i:].__array_interface__['data'][0], num_write, 
                               arr.dtype.itemsize, clevel=clevel,cname=cname,shuffle=shuffle)
        size[0]=len(c)
        size.tofile(f)
        f.write(c)
    f.close()

def decompress(file,prealloc_arr=None):
    f=open(file,"rb")
    shape,arr_size,dtype,is_f_contiguous,num_chunks,max_num=pickle.load(f)

    if prealloc_arr is None:
        if prealloc_arr.flags['F_CONTIGUOUS']==True
            prealloc_arr=prealloc_arr.T
        if prealloc_arr.flags['C_CONTIGUOUS']!=True
            raise(TypeError("Contiguous array is needed"))
        arr=np.empty(arr_size,dtype)
    else:
        arr=np.frombuffer(prealloc_arr.data, dtype=dtype, count=arr_size)

    for i in range(num_chunks):
        size=np.fromfile(f,np.uint32,count=1)
        c=f.read(size[0])
        blosc.decompress_ptr(c, arr[max_num*i:].__array_interface__['data'][0])
    f.close()

    #reshape
    if is_f_contiguous:
        arr=arr.reshape(shape[::-1]).T
    else:
        arr=arr.reshape(shape)
    return arr

Leave a Comment