Skip to content

Commit

Permalink
unified version number for package and primitives
Browse files Browse the repository at this point in the history
  • Loading branch information
bjschoenfeld committed Nov 24, 2020
1 parent efe5586 commit 447dd8a
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 97 deletions.
5 changes: 1 addition & 4 deletions byudml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
__version__ = '0.7.4'
__metafeature_version__ = '0.4.6'
__version__ = '0.7.5'
__metafeature_path__ = 'd3m.primitives.metalearning.metafeature_extractor.BYU'
__imputer_version__ = '0.2.2'
__imputer_path__ = 'd3m.primitives.data_preprocessing.random_sampling_imputer.BYU'
__profiler_version__ = '0.0.4'
__profiler_path__ = 'd3m.primitives.schema_discovery.profiler.BYU'
15 changes: 7 additions & 8 deletions byudml/imputer/random_sampling_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from d3m.primitive_interfaces.base import CallResult
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase

from byudml import __imputer_path__, __imputer_version__
from byudml import __version__ as __package_version__
from byudml import __imputer_path__, __version__


Inputs = container.pandas.DataFrame
Expand Down Expand Up @@ -43,33 +42,33 @@ class RandomSamplingImputer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Pa

metadata = metadata_base.PrimitiveMetadata({
'id': 'ebfeb6f0-e366-4082-b1a7-602fd50acc96',
'version': __imputer_version__,
'version': __version__,
'name': 'Random Sampling Imputer',
'source': {
'name': 'byu-dml',
'contact': 'mailto:[email protected]',
'uris': [
'https://github.com/byu-dml/d3m-primitives'
'https://github.com/byu-dml/d3m-primitives',
]
},
'installation': [
{
'type': metadata_base.PrimitiveInstallationType.PIP,
'package': 'byudml',
'version': __package_version__
'version': __version__,
}
],
'location_uris': [
'https://github.com/byu-dml/d3m-primitives/blob/master/byudml/imputer/random_sampling_imputer.py'
'https://github.com/byu-dml/d3m-primitives/blob/master/byudml/imputer/random_sampling_imputer.py',
],
'python_path': __imputer_path__,
'primitive_family': metadata_base.PrimitiveFamily.DATA_PREPROCESSING,
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.IMPUTATION
metadata_base.PrimitiveAlgorithmType.IMPUTATION,
],
'effects': [
# not the case if empty columns are just ignored
metadata_base.PrimitiveEffect.NO_MISSING_VALUES
metadata_base.PrimitiveEffect.NO_MISSING_VALUES,
]
})

Expand Down
9 changes: 4 additions & 5 deletions byudml/metafeature_extraction/metafeature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
from metalearn.metafeatures.metafeatures import Metafeatures
import metalearn.metafeatures.constants as mf_consts

from byudml import __version__ as __package_version__
from byudml import __metafeature_path__, __metafeature_version__
from byudml import __metafeature_path__, __version__


Inputs = DataFrame
Expand Down Expand Up @@ -67,7 +66,7 @@ class MetafeatureExtractor(FeaturizationTransformerPrimitiveBase[Inputs, Outputs
# This should contain only metadata which cannot be automatically determined from the code.
metadata = metadata_base.PrimitiveMetadata({
'id': '28d12214-8cb0-4ac0-8946-d31fcbcd4142',
'version': __metafeature_version__,
'version': __version__,
'name': 'Dataset Metafeature Extraction',
'source': {
'name': 'byu-dml',
Expand All @@ -80,7 +79,7 @@ class MetafeatureExtractor(FeaturizationTransformerPrimitiveBase[Inputs, Outputs
{
'type': metadata_base.PrimitiveInstallationType.PIP,
'package': 'byudml',
'version': __package_version__
'version': __version__,
}
],
'location_uris': [
Expand Down Expand Up @@ -179,7 +178,7 @@ def _set_implementation_fields(self, data_metafeatures, data_metafeatures_path):
if landmarking_name not in data_metafeatures:
primitive_field_path = [landmarking_name, 'primitive']
random_seed_field_path = [landmarking_name, 'random_seed']
primitive_field_val = {'id': self.metadata.query()['id'], 'version': __metafeature_version__, 'python_path': self.metadata.query()['python_path'], 'name': self.metadata.query()['name']}
primitive_field_val = {'id': self.metadata.query()['id'], 'version': __version__, 'python_path': self.metadata.query()['python_path'], 'name': self.metadata.query()['name']}
if 'digest' in self.metadata.query():
primitive_field_val['digest'] = self.metadata.query()['digest']
random_seed_field_val = self.random_seed
Expand Down
7 changes: 3 additions & 4 deletions byudml/profiler/profiler_primitive.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
from d3m.metadata import base as metadata_base, hyperparams as hyperparams_module, params
from d3m.primitive_interfaces import base, unsupervised_learning

from byudml import __version__ as __package_version__
from byudml import __profiler_path__, __profiler_version__
from byudml import __profiler_path__, __version__

import common_primitives
from common_primitives import utils
Expand Down Expand Up @@ -196,7 +195,7 @@ class SemanticProfilerPrimitive(unsupervised_learning.UnsupervisedLearnerPrimiti
]
metadata = metadata_base.PrimitiveMetadata({
'id': 'af214333-e67b-4e59-a49b-b16f5501a925',
'version': __profiler_version__,
'version': __version__,
'name': 'Semantic Profiler',
'description': 'This primitive is an adapatation of the d3m common profiler (https://gitlab.com/datadrivendiscovery/common-primitives/-/blob/c170029e9a0f875af28c6b9af20adc90bd4df0bb/common_primitives/simple_profiler.py). It predicts semantic column types using a natural language embeddings of the the column name. The internal model uses these embeddings to predict the semantic types found in the dataset annotations created by MIT Lincoln Labs.',
'python_path': __profiler_path__,
Expand All @@ -211,7 +210,7 @@ class SemanticProfilerPrimitive(unsupervised_learning.UnsupervisedLearnerPrimiti
{
'type': metadata_base.PrimitiveInstallationType.PIP,
'package': 'byudml',
'version': __package_version__
'version': __version__,
},
] + _weights_configs,
'algorithm_types': [
Expand Down
74 changes: 5 additions & 69 deletions submission/pipelines/generate_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
from byudml.imputer.random_sampling_imputer import RandomSamplingImputer
from byudml.metafeature_extraction.metafeature_extraction import MetafeatureExtractor
from byudml.profiler.profiler_primitive import SemanticProfilerPrimitive
from byudml import (
__imputer_version__, __imputer_path__, __metafeature_version__, __metafeature_path__,
__profiler_version__, __profiler_path__
)
from byudml import __imputer_path__, __metafeature_path__, __profiler_path__, __version__

import sys
sys.path.append('.')
from submission.utils import (
Expand Down Expand Up @@ -708,68 +706,6 @@ def update_pipeline(
return pipeline_to_update


# TODO: do we still want this code?
# def add_best_pipelines(base_dir):
# """
# This function checks the best_pipelines.csv for the best pipelines for a dataset, prepares and updates it, and writes it to the submodule.
# It also check how many pipelines beat MIT-LL and the EXlines.
# """
# mongo_client = pymongo.MongoClient(lab_hostname, real_mongo_port)

# imputer_version = None
# best_pipelines_df = pd.read_csv("submission/pipelines/best_pipelines.csv", index_col=0)

# beat_mit = 0
# beat_exlines = 0
# has_pipeline = 0
# for index, dataset in enumerate(best_pipelines_df):
# dataset_id = dataset.replace("_dataset", "", 1)
# if dataset_id not in list(seed_datasets_exlines.keys()):
# continue

# # grab the best pipeline
# pipelines = best_pipelines_df[dataset]
# best_pipeline_id = pipelines.idxmax()
# best_pipeline_score = pipelines.max()
# has_pipeline += 1

# # See how well we do compared to others
# problem_details = seed_datasets_exlines[dataset_id]
# if problem_details["problem"] == "accuracy":
# if problem_details["score"] <= best_pipeline_score:
# beat_exlines += 1
# if problem_details["mit-score"] <= best_pipeline_score:
# beat_mit += 1
# else:
# ## is regression
# if problem_details["score"] >= best_pipeline_score:
# beat_exlines += 1
# if problem_details["mit-score"] >= best_pipeline_score:
# beat_mit += 1

# # get the best pipeline and update it
# best_pipeline_json = get_pipeline_from_database(best_pipeline_id, mongo_client)
# del best_pipeline_json["_id"]
# no_digest_pipeline = remove_digests(best_pipeline_json)
# updated_pipeline = update_pipeline(no_digest_pipeline)

# # get directory to put new pipelines
# if imputer_version == None:
# IMPUTER_PIPELINE_PATH = os.path.join(base_dir, __imputer_path__, __imputer_version__, "pipelines/")

# print("Writing pipeline for dataset: {} to {}".format(dataset, IMPUTER_PIPELINE_PATH + best_pipeline_id + ".json"))
# with open(IMPUTER_PIPELINE_PATH + best_pipeline_id + ".json", "w") as file:
# file.write(json.dumps(updated_pipeline, indent=4))

# # TODO: Run the pipeline and save the pipeline run as well.


# print("############## RESULTS #################")
# print(beat_mit, " pipelines beat MIT")
# print(beat_exlines, " pipelines beat EXlines")
# print(has_pipeline, " pipelines for seed datasets")


def generate_and_update_primitive_pipeline(
primitive: PrimitiveBase,
pipeline_gen_f: Callable,
Expand Down Expand Up @@ -803,19 +739,19 @@ def main():
{
'primitive': RandomSamplingImputer,
'gen_method': generate_imputer_pipeline,
'version': __imputer_version__,
'version': __version__,
'primitive_simple_name': 'random_sampling_imputer',
},
{
'primitive': MetafeatureExtractor,
'gen_method': generate_metafeature_pipeline,
'version': __metafeature_version__,
'version': __version__,
'primitive_simple_name': 'metafeature_extractor',
},
{
'primitive': SemanticProfilerPrimitive,
'gen_method': generate_profiler_pipeline,
'version': __profiler_version__,
'version': __version__,
'primitive_simple_name': 'profiler'
},
]
Expand Down
12 changes: 5 additions & 7 deletions submission/primitive_jsons/generate_primitive_jsons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
from byudml.imputer.random_sampling_imputer import RandomSamplingImputer
from byudml.metafeature_extraction.metafeature_extraction import MetafeatureExtractor
from byudml.profiler.profiler_primitive import SemanticProfilerPrimitive
from byudml import (
__imputer_version__, __imputer_path__, __metafeature_version__, __metafeature_path__,
__profiler_version__, __profiler_path__
)
from byudml import __imputer_path__, __metafeature_path__, __profiler_path__, __version__

import sys
sys.path.append('.')
from submission.utils import get_new_d3m_path, clear_directory
Expand All @@ -30,9 +28,9 @@ def save_primitive_json(primitive, path):
byu_dir = get_new_d3m_path()
clear_directory(byu_dir)

IMPUTER_JSON_PATH = os.path.join(byu_dir, __imputer_path__, __imputer_version__, PRIMITIVE_JSON)
METAFEATURE_JSON_PATH = os.path.join(byu_dir, __metafeature_path__, __metafeature_version__, PRIMITIVE_JSON)
PROFILER_JSON_PATH = os.path.join(byu_dir, __profiler_path__, __profiler_version__, PRIMITIVE_JSON)
IMPUTER_JSON_PATH = os.path.join(byu_dir, __imputer_path__, __version__, PRIMITIVE_JSON)
METAFEATURE_JSON_PATH = os.path.join(byu_dir, __metafeature_path__, __version__, PRIMITIVE_JSON)
PROFILER_JSON_PATH = os.path.join(byu_dir, __profiler_path__, __version__, PRIMITIVE_JSON)

save_primitive_json(RandomSamplingImputer, IMPUTER_JSON_PATH)
save_primitive_json(MetafeatureExtractor, METAFEATURE_JSON_PATH)
Expand Down

0 comments on commit 447dd8a

Please sign in to comment.