Skip to content

Commit

Permalink
Merge pull request #19 from psrenergy/vj/merge-quivers
Browse files Browse the repository at this point in the history
merge quivers
  • Loading branch information
guilhermebodin authored Sep 12, 2024
2 parents 978619c + 00ef500 commit 250aee1
Show file tree
Hide file tree
Showing 7 changed files with 839 additions and 117 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Quiver"
uuid = "cdbb3f72-2527-4dbd-9d0e-93533a5519ac"
authors = ["raphasampaio", "guilhermebodin"]
version = "0.1.2"
version = "0.1.3"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
2 changes: 2 additions & 0 deletions src/Quiver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ include("reader.jl")
include("csv.jl")
include("binary.jl")

include("merge.jl")

end
69 changes: 65 additions & 4 deletions src/csv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,74 @@ function _quiver_next_dimension!(reader::Quiver.Reader{csv})
for (i, ts) in enumerate(reader.metadata.labels)
reader.all_labels_data_cache[i] = row[Symbol(ts)]
end
next = iterate(reader.reader.iterator, state)
reader.reader.next = next
reader.reader.next = iterate(reader.reader.iterator, state)
return nothing
end

function _quiver_goto!(reader::Quiver.Reader{csv}, dims...)
error("_quiver_goto! not implemented for csv")
function _calculate_order_in_file(metadata::Quiver.Metadata, dims...)
position = 0
for i in 1:metadata.number_of_dimensions - 1
position += (dims[i] - 1) * performant_product_from_index_i_to_j(
metadata.dimension_size,
i + 1,
metadata.number_of_dimensions
)
end
position += (dims[end] - 1)
return position
end

function _current_dimension_in_iterator(reader::Quiver.Reader{csv})
if reader.reader.next === nothing
error("No more data to read")
end
(row, state) = reader.reader.next
dims = zeros(Int, reader.metadata.number_of_dimensions)
for (i, dim) in enumerate(reader.metadata.dimensions)
dims[i] = row[dim]
end
return dims
end

function _quiver_goto!(reader::Quiver.Reader{csv})
if reader.reader.next === nothing
error("No more data to read")
return nothing
end

dimension_in_iterator = _current_dimension_in_iterator(reader)
dimension_to_read = reader.last_dimension_read

order_of_dimension_in_iterator = _calculate_order_in_file(reader.metadata, dimension_in_iterator...)
order_of_dimension_to_read = _calculate_order_in_file(reader.metadata, dimension_to_read...)

if order_of_dimension_in_iterator > order_of_dimension_to_read
error("Cannot read a dimension that is posterior to the current dimension")
elseif order_of_dimension_in_iterator == order_of_dimension_to_read
(row, state) = reader.reader.next
is_first_index = true
for (i, dim) in enumerate(reader.metadata.dimensions)
reader.last_dimension_read[i] = row[dim]
is_first_index = is_first_index && row[dim] == 1
end

for (i, ts) in enumerate(reader.metadata.labels)
if is_first_index
reader.all_labels_data_cache[i] = row[Symbol(ts)]
else
reader.all_labels_data_cache[i] = NaN
end
end

_quiver_next_dimension!(reader)
return nothing
else
while order_of_dimension_in_iterator <= order_of_dimension_to_read
_quiver_next_dimension!(reader)
dimension_in_iterator = _current_dimension_in_iterator(reader)
order_of_dimension_in_iterator = _calculate_order_in_file(reader.metadata, dimension_in_iterator...)
end
end
return nothing
end

Expand Down
83 changes: 83 additions & 0 deletions src/merge.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
function merge(
output_filename::String,
filenames::Vector{String},
impl::Type{<:Implementation},
)
readers = [Quiver.Reader{impl}(filename) for filename in filenames]
metadata = first(readers).metadata
labels = String[]

iterator = 0
msg = ""
for reader in readers
if metadata.dimensions != reader.metadata.dimensions
iterator += 1
msg = "$(msg)[Error $iterator] Dimensions are different. Dimensions in file $(first(readers).filename) is $(metadata.dimensions) and in file $(reader.filename) is $(reader.metadata.dimensions).\n\n"
end
if metadata.dimension_size != reader.metadata.dimension_size
iterator += 1
msg = "$(msg)[Error $iterator] Dimension sizes are different. Dimension size in file $(first(readers).filename) is $(metadata.dimension_size) and in file $(reader.filename) is $(reader.metadata.dimension_size).\n\n"
end
if metadata.time_dimension != reader.metadata.time_dimension
iterator += 1
msg = "$(msg)[Error $iterator] Time dimensions are different. Time dimension in file $(first(readers).filename) is $(metadata.time_dimension) and in file $(reader.filename) is $(reader.metadata.time_dimension).\n\n"
end
if metadata.initial_date != reader.metadata.initial_date
iterator += 1
msg = "$(msg)[Error $iterator] Initial dates are different. Initial date in file $(first(readers).filename) is $(metadata.initial_date) and in file $(reader.filename) is $(reader.metadata.initial_date).\n\n"
end
if metadata.unit != reader.metadata.unit
iterator += 1
msg = "$(msg)[Error $iterator] Units are different. Unit in file $(first(readers).filename) is $(metadata.unit) and in file $(reader.filename) is $(reader.metadata.unit).\n\n"
end
current_label = reader.metadata.labels
for label in current_label
if label in labels
iterator += 1
msg = "$(msg)[Error $iterator] Label $(label) in file $(reader.metadata.dimensions) is already in the merged labels.\n\n"
end
end
append!(labels, current_label)
end

if !isempty(msg)
throw(ArgumentError("Merge has $iterator errors.\n\n$msg"))
end

writer = Quiver.Writer{impl}(
output_filename;
labels = labels,
dimensions = string.(metadata.dimensions),
time_dimension = string(metadata.time_dimension),
dimension_size = metadata.dimension_size,
initial_date = metadata.initial_date,
unit = metadata.unit,
)

num_labels = [length(reader.metadata.labels) for reader in readers]
data = zeros(sum(num_labels))
for dims in Iterators.product([1:size for size in reverse(metadata.dimension_size)]...)
dim_kwargs = OrderedDict(metadata.dimensions .=> reverse(dims))
for (i, reader) in enumerate(readers)
Quiver.goto!(reader; dim_kwargs...)
if i == 1
initial_idx = 1
else
initial_idx = sum(num_labels[1:i-1]) + 1
end
final_idx = sum(num_labels[1:i])
data[initial_idx:final_idx] = reader.data
end
if all(isnan.(data))
continue
end
Quiver.write!(writer, data; dim_kwargs...)
end

for reader in readers
Quiver.close!(reader)
end

Quiver.close!(writer)
return nothing
end
134 changes: 134 additions & 0 deletions test/test_convert.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
module TestConvert

using Dates
using Quiver
using Test

function binary_to_csv()
filename = joinpath(@__DIR__, "test_binary_to_csv")

initial_date = DateTime(2006, 1, 1)
num_stages = 10
dates = collect(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1))
num_scenarios = 12
num_blocks_per_stage = Int32.(Dates.daysinmonth.(dates) .* 24)
num_time_series = 3

dimensions = ["stage", "scenario", "block"]
labels = ["agent_$i" for i in 1:num_time_series]
time_dimension = "stage"
dimension_size = [num_stages, num_scenarios, maximum(num_blocks_per_stage)]

writer = Quiver.Writer{Quiver.binary}(
filename;
dimensions,
labels,
time_dimension,
dimension_size,
initial_date = initial_date
)

for stage in 1:num_stages
for scenario in 1:num_scenarios
for block in 1:num_blocks_per_stage[stage]
data = [stage, scenario, block]
Quiver.write!(writer, data; stage, scenario, block)
end
end
end

Quiver.close!(writer)

Quiver.convert(filename, Quiver.binary, Quiver.csv)

reader = Quiver.Reader{Quiver.csv}(filename)
for stage in 1:num_stages
for scenario in 1:num_scenarios
for block in 1:num_blocks_per_stage[stage]
Quiver.next_dimension!(reader)
@test reader.data == [stage, scenario, block]
end
end
end

Quiver.close!(reader)

rm("$filename.$(Quiver.file_extension(Quiver.binary))")
rm("$filename.$(Quiver.file_extension(Quiver.csv))")
rm("$filename.toml")
end

function csv_to_binary()
filename = joinpath(@__DIR__, "test_csv_to_binary")

initial_date = DateTime(2006, 1, 1)
num_stages = 10
dates = collect(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1))
num_scenarios = 12
num_blocks_per_stage = Int32.(Dates.daysinmonth.(dates) .* 24)
num_time_series = 3

dimensions = ["stage", "scenario", "block"]
labels = ["agent_$i" for i in 1:num_time_series]
time_dimension = "stage"
dimension_size = [num_stages, num_scenarios, maximum(num_blocks_per_stage)]

writer = Quiver.Writer{Quiver.csv}(
filename;
dimensions,
labels,
time_dimension,
dimension_size,
initial_date = initial_date
)

for stage in 1:num_stages
for scenario in 1:num_scenarios
for block in 1:num_blocks_per_stage[stage]
data = [stage, scenario, block]
Quiver.write!(writer, data; stage, scenario, block)
end
end
end

Quiver.close!(writer)

Quiver.convert(filename, Quiver.csv, Quiver.binary)

reader = Quiver.Reader{Quiver.binary}(filename)
for stage in 1:num_stages
for scenario in 1:num_scenarios
for block in 1:num_blocks_per_stage[stage]
Quiver.goto!(reader; stage, scenario, block)
@test reader.data == [stage, scenario, block]
end
end
end

Quiver.close!(reader)

rm("$filename.$(Quiver.file_extension(Quiver.csv))")
rm("$filename.$(Quiver.file_extension(Quiver.binary))")
rm("$filename.toml")
end

function test_convert()
binary_to_csv()
csv_to_binary()
end

function runtests()
Base.GC.gc()
Base.GC.gc()
for name in names(@__MODULE__; all = true)
if startswith("$name", "test_")
@testset "$(name)" begin
getfield(@__MODULE__, name)()
end
end
end
end

TestConvert.runtests()

end
Loading

2 comments on commit 250aee1

@guilhermebodin
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/115076

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.3 -m "<description of version>" 250aee18e61e2966e74a2e5153bc7c04744ba023
git push origin v0.1.3

Please sign in to comment.