-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dfview #297
base: master
Are you sure you want to change the base?
Dfview #297
Changes from 236 commits
a2d7008
62925bb
0e313dc
e211371
39e4535
329a7cc
d9d8b02
f7ba342
21f0fa9
c9363ef
14fc1f3
2d13342
666073e
73aa50e
689cc3f
8ba818f
abb3337
3180cbd
9b9c420
55989d6
cd69d04
f2136d5
30953e3
0dccc6e
000463d
37972b5
74c1dad
bf210c4
95c1645
5db42d2
664e255
02265fe
f536652
bafe9cf
223dbe9
cc48016
948ce1a
37b8ac2
0369c92
2096828
f213240
b050d74
76b5ff1
fe36b94
daa6012
5c43f38
459b91c
c0ac960
dd0867d
e52d825
463ea70
76d1952
7cfeceb
b1cb082
eaac2b6
a9ce1fb
113a83f
375982c
e9d1053
e1ed80d
f4fe394
a057677
0845a63
4d2886a
db3ec9f
f2efedc
4926330
56bb190
04d810b
f0b7e37
18d49a6
737eeed
732762d
ab6508c
39027f7
358d82b
98a4d7f
e6b1a57
a0e0167
204bd39
c788b96
60f2ba9
bba4829
c341eb2
b23f1d8
63bd5a0
650014e
cb9f2a2
013f401
18ce7ce
8657081
51e2fec
955aede
039d8ee
547bb88
700635f
dec92ca
b631932
4804417
f16cb09
37dac08
8c62e0a
cfcb69b
22504ef
23c373d
98624e6
76d8717
44a9c3d
210f847
f8829ae
a7d6673
3d322c2
294ec3a
e8edd9d
c2ba9ff
1a19815
1fb0362
937368e
cddcf66
534cbd4
e5dc536
9b1a4a9
1967685
6c3270a
6bdb08e
3680436
cf5f5a6
23ad71a
75eefc0
3ddc916
3a6dc51
c02fe32
58159d0
a7b477d
903f3b4
29f736d
7fd9bdc
04df757
e47e15c
a4b14fb
5492b94
25320bd
e289c6b
a59c13a
87df0bc
0875149
c335831
bdf783a
ea20c60
778d56c
611601a
66867b7
fbe396f
c2c7185
78cc222
001134c
eb0bb76
d646ac2
b55775b
0d23098
7774c6f
ae1d621
3d5738e
4685c6b
dc38d28
0df34bc
87353e3
87abe47
a3719ef
ed42f70
2157da2
1abeaa7
e77562e
03208aa
35430f2
de3e7e5
a8af750
d41a24b
c98b87c
a57c413
764650b
4676901
e5d74c6
030d587
55e62eb
7cf7bae
521142e
9373fd2
6f67ac4
703a19a
613532a
b981bb9
e35c1c4
a7ee946
0f319d1
a5ab148
f50ab1f
94a074a
ef563e9
e0caad0
ba06c9f
25a3cc3
2e03557
5f83d7e
d932731
fff9a19
ffbc4f0
7f82d58
474425a
1c59265
5002c65
efcde7c
825aaf1
47e2e7e
071be03
c908397
72cca74
bd9a85f
62d0d69
40e8770
81bbbcd
3507595
099c8f6
3d6966e
735b7a5
85169e6
9667dd7
c333f6d
bb764bd
7803d76
dfb36ab
5c93b43
fe9cee8
c1ad9ba
80c0339
6cb1d3e
e8cf7f2
3153f2b
135260e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,10 +58,16 @@ def __init__(self, | |
self.name = name | ||
self._columns = OrderedDict() | ||
self._dataset = dataset | ||
self._h5group = h5group | ||
self._h5group = h5group # the HDF5 group to store all fields | ||
|
||
for subg in h5group.keys(): | ||
self._columns[subg] = dataset.session.get(h5group[subg]) | ||
if subg[0] != '_': # stores metadata, for example filters | ||
self._columns[subg] = dataset.session.get(h5group[subg]) | ||
|
||
if '_filters' not in h5group.keys(): | ||
self._filters_grp = self._h5group.create_group('_filters') | ||
else: | ||
self._filters_grp = h5group['_filters'] | ||
|
||
@property | ||
def columns(self): | ||
|
@@ -101,15 +107,53 @@ def add(self, | |
nfield.data.write(field.data[:]) | ||
self._columns[dname] = nfield | ||
|
||
def _add_view(self, field: fld.Field, filter: np.ndarray = None): | ||
""" | ||
Internal function called by apply_filter to add a field view into the dataframe. | ||
|
||
:param field: The field to apply filter to. | ||
:param filter: The filter to apply. | ||
:return: The field view. | ||
|
||
""" | ||
# add view | ||
view = type(field)(field._session, field._field, self, write_enabled=True) | ||
field.attach(view) | ||
self._columns[view.name] = view | ||
|
||
# add filter | ||
if filter is not None: | ||
nformat = 'int32' | ||
if len(filter) > 0 and np.max(filter) >= 2**31 - 1: | ||
nformat = 'int64' | ||
filter_name = view.name | ||
if filter_name not in self._filters_grp.keys(): | ||
fld.numeric_field_constructor(self._dataset.session, self._filters_grp, filter_name, nformat) | ||
filter_field = fld.NumericField(self._dataset.session, self._filters_grp[filter_name], self, | ||
write_enabled=True) | ||
filter_field.data.write(filter) | ||
else: | ||
filter_field = fld.NumericField(self._dataset.session, self._filters_grp[filter_name], self, | ||
write_enabled=True) | ||
if nformat not in filter_field._fieldtype: | ||
filter_field = filter_field.astype(nformat) | ||
filter_field.data.clear() | ||
filter_field.data.write(filter) | ||
|
||
view._filter_wrapper = fld.ReadOnlyFieldArray(filter_field, 'values') # read-only | ||
|
||
return self._columns[view.name] | ||
|
||
def drop(self, | ||
name: str): | ||
""" | ||
Drop a field from this dataframe as well as the HDF5 Group | ||
|
||
:param name: name of field to be dropped | ||
""" | ||
del self._columns[name] | ||
del self._h5group[name] | ||
del self._columns[name] # should always be | ||
if name in self._h5group.keys(): # in case of reference only | ||
del self._h5group[name] | ||
|
||
def create_group(self, | ||
name: str): | ||
|
@@ -317,8 +361,10 @@ def __delitem__(self, name): | |
if not self.__contains__(name=name): | ||
raise ValueError("There is no field named '{}' in this dataframe".format(name)) | ||
else: | ||
del self._h5group[name] | ||
del self._columns[name] | ||
del self._columns[name] # should always be | ||
if name in self._h5group.keys(): # in case of reference only | ||
del self._h5group[name] | ||
|
||
|
||
def delete_field(self, field): | ||
""" | ||
|
@@ -478,13 +524,12 @@ def apply_filter(self, filter_to_apply, ddf=None): | |
:returns: a dataframe contains all the fields filterd, self if ddf is not set | ||
""" | ||
filter_to_apply_ = val.validate_filter(filter_to_apply) | ||
|
||
if ddf is not None: | ||
if ddf is not None and ddf is not self: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ddf = self if ddf is None There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if not isinstance(ddf, DataFrame): | ||
raise TypeError("The destination object must be an instance of DataFrame.") | ||
filter_to_apply_ = filter_to_apply_.nonzero()[0] | ||
for name, field in self._columns.items(): | ||
newfld = field.create_like(ddf, name) | ||
field.apply_filter(filter_to_apply_, target=newfld) | ||
ddf._add_view(field, filter_to_apply_) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check if the same dataset |
||
return ddf | ||
else: | ||
for field in self._columns.values(): | ||
|
@@ -514,15 +559,16 @@ def apply_index(self, index_to_apply, ddf=None): | |
:param ddf: optional- the destination data frame | ||
:returns: a dataframe contains all the fields re-indexed, self if ddf is not set | ||
""" | ||
if ddf is not None: | ||
if ddf is not None and ddf is not self: | ||
if not isinstance(ddf, DataFrame): | ||
raise TypeError("The destination object must be an instance of DataFrame.") | ||
for name, field in self._columns.items(): | ||
newfld = field.create_like(ddf, name) | ||
field.apply_index(index_to_apply, target=newfld) | ||
# newfld = field.create_like(ddf, name) | ||
# field.apply_index(index_to_apply, target=newfld) | ||
ddf._add_view(field, index_to_apply) | ||
return ddf | ||
else: | ||
val.validate_all_field_length_in_df(self) | ||
val.validate_all_field_length_in_df(self) | ||
|
||
for field in self._columns.values(): | ||
field.apply_index(index_to_apply, in_place=True) | ||
|
@@ -981,6 +1027,11 @@ def describe(self, include=None, exclude=None, output='terminal'): | |
print('\n') | ||
return result | ||
|
||
def view(self): | ||
dfv = self.dataset.create_dataframe(self.name + '_view') | ||
for f in self.columns.values(): | ||
dfv._add_view(f) | ||
return dfv | ||
|
||
|
||
class HDF5DataFrameGroupBy(DataFrameGroupBy): | ||
|
@@ -1656,4 +1707,4 @@ def _ordered_merge(left: DataFrame, | |
if right[k].indexed: | ||
ops.ordered_map_valid_indexed_stream(right[k], right_map, dest_f, invalid) | ||
else: | ||
ops.ordered_map_valid_stream(right[k], right_map, dest_f, invalid) | ||
ops.ordered_map_valid_stream(right[k], right_map, dest_f, invalid) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
utils.INT64_INDEX_LENGTH