-
Notifications
You must be signed in to change notification settings - Fork 0
/
dummy_metaset.py
37 lines (31 loc) · 990 Bytes
/
dummy_metaset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
import numpy as np
# Define the number of datasets and points per dataset
num_datasets = 100
points_per_dataset = 5000
# Generate data
data = {
'dataset_name': [],
'point_id': [],
'x': [],
'y': [],
'z': [],
'w': [],
'additional_info': []
}
# Populate the data dictionary
for dataset_id in range(num_datasets):
dataset_name = f"Dataset_{dataset_id}"
for point_id in range(points_per_dataset):
data['dataset_name'].append(dataset_name)
data['point_id'].append(point_id)
data['x'].append(np.random.rand())
data['y'].append(np.random.rand())
data['z'].append(np.random.rand())
data['w'].append(np.random.rand())
data['additional_info'].append(f"Metadata for point {point_id} in {dataset_name}")
# Create a DataFrame
df = pd.DataFrame(data)
# Save the DataFrame to a Parquet file
df.to_parquet('test_metaset.parquet')
print("Test MetaSet Parquet file generated successfully.")