Skip to content

Commit

Permalink
handle large array when copying
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander-Barth committed Sep 11, 2023
1 parent 08a4888 commit 0be5809
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ subdata = NCDataset("/tmp/test.nc")["temperature"][10:30,30:5:end]
```

This might be useful in an interactive session. However, the file `test.nc` is not closed, which can be a problem if you open many files. On Linux the number of opened files is often limited to 1024 (soft limit). If you write to a file, you should also always close the file to make sure that the data is properly written to the disk.
(open files will get closed eventually when the dataset variable is finalized by julia's garbage collector).

An alternative way to ensure the file has been closed is to use a `do` block: the file will be closed automatically when leaving the block.

Expand Down
1 change: 1 addition & 0 deletions src/NCDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ include("defer.jl")
include("multifile.jl")
include("ncgen.jl")
include("select.jl")
include("chunks.jl")
include("precompile.jl")

export CatArrays
Expand Down
32 changes: 30 additions & 2 deletions src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,10 @@ It is assumed that all the variable of the output file can be loaded in memory.
function Base.write(dest::NCDataset, src::AbstractDataset;
include = keys(src),
exclude = String[],
idimensions = Dict())
idimensions = Dict(),
chunk_max_length = 10_000_000,
_ignore_checksum = false,
)

torange(indices::Colon) = indices
function torange(indices)
Expand Down Expand Up @@ -500,9 +503,34 @@ function Base.write(dest::NCDataset, src::AbstractDataset;
# indices for subset
index = ntuple(i -> torange(get(idimensions,dimension_names[i],:)),length(dimension_names))

var_slice = view(var,index...)


destvar = defVar(dest, varname, eltype(var), dimension_names; attrib = attribs(cfvar))

if hasmethod(chunking,Tuple{typeof(var_slice)})
storage,chunksizes = chunking(var_slice)
@debug "chunking " storage chunksizes
chunking(destvar,storage,chunksizes)
end

if hasmethod(deflate,Tuple{typeof(var_slice)})
isshuffled,isdeflated,deflate_level = deflate(var_slice)
@debug "compression" isshuffled isdeflated deflate_level
deflate(destvar,isshuffled,isdeflated,deflate_level)
end

if hasmethod(checksum,Tuple{typeof(var_slice)}) && !_ignore_checksum
checksummethod = checksum(var_slice)
@debug "check-sum" checksummethod
checksum(destvar,checksummethod)
end

# copy data
destvar.var[:] = cfvar.var[index...]
for ci in each_chunk_index(var_slice,chunk_max_length)
@debug "indices" ci
destvar.var[ci] = var_slice[ci]
end
end

# loop over all global attributes
Expand Down
8 changes: 8 additions & 0 deletions src/subvariable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,11 @@ function dataset(v::SubVariable)
indices = (;((Symbol(d),i) for (d,i) in zip(dimnames(v),v.indices))...)
return SubDataset(dataset(v.parent),indices)
end

function chunking(v::SubVariable)
storage, chunksizes = chunking(v.parent)
return storage, min.(chunksizes,collect(size(v)))
end

deflate(v::SubVariable) = deflate(v.parent)
checksum(v::SubVariable) = checksum(v.parent)

0 comments on commit 0be5809

Please sign in to comment.