unified version number for package and primitives

byu-dml · Nov 24, 2020 · 447dd8a · 447dd8a
1 parent efe5586
commit 447dd8a
Show file tree

Hide file tree

Showing 6 changed files with 25 additions and 97 deletions.
diff --git a/byudml/__init__.py b/byudml/__init__.py
@@ -1,7 +1,4 @@
-__version__ = '0.7.4'
-__metafeature_version__ = '0.4.6'
+__version__ = '0.7.5'
 __metafeature_path__ = 'd3m.primitives.metalearning.metafeature_extractor.BYU'
-__imputer_version__ = '0.2.2'
 __imputer_path__ = 'd3m.primitives.data_preprocessing.random_sampling_imputer.BYU'
-__profiler_version__ = '0.0.4'
 __profiler_path__ = 'd3m.primitives.schema_discovery.profiler.BYU'
diff --git a/byudml/imputer/random_sampling_imputer.py b/byudml/imputer/random_sampling_imputer.py
@@ -6,8 +6,7 @@
 from d3m.primitive_interfaces.base import CallResult
 from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase
 
-from byudml import __imputer_path__, __imputer_version__
-from byudml import __version__ as __package_version__
+from byudml import __imputer_path__, __version__
 
 
 Inputs = container.pandas.DataFrame
@@ -43,33 +42,33 @@ class RandomSamplingImputer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Pa
 
     metadata = metadata_base.PrimitiveMetadata({
         'id': 'ebfeb6f0-e366-4082-b1a7-602fd50acc96',
-        'version': __imputer_version__,
+        'version': __version__,
         'name': 'Random Sampling Imputer',
         'source': {
             'name': 'byu-dml',
             'contact': 'mailto:[email protected]',
             'uris': [
-                'https://github.com/byu-dml/d3m-primitives'
+                'https://github.com/byu-dml/d3m-primitives',
             ]
         },
         'installation': [
             {
                 'type': metadata_base.PrimitiveInstallationType.PIP,
                 'package': 'byudml',
-                'version': __package_version__
+                'version': __version__,
             }
         ],
         'location_uris': [
-            'https://github.com/byu-dml/d3m-primitives/blob/master/byudml/imputer/random_sampling_imputer.py'
+            'https://github.com/byu-dml/d3m-primitives/blob/master/byudml/imputer/random_sampling_imputer.py',
         ],
         'python_path': __imputer_path__,
         'primitive_family': metadata_base.PrimitiveFamily.DATA_PREPROCESSING,
         'algorithm_types': [
-            metadata_base.PrimitiveAlgorithmType.IMPUTATION
+            metadata_base.PrimitiveAlgorithmType.IMPUTATION,
         ],
         'effects': [
             # not the case if empty columns are just ignored
-            metadata_base.PrimitiveEffect.NO_MISSING_VALUES
+            metadata_base.PrimitiveEffect.NO_MISSING_VALUES,
         ]
     })
 

diff --git a/byudml/metafeature_extraction/metafeature_extraction.py b/byudml/metafeature_extraction/metafeature_extraction.py
@@ -13,8 +13,7 @@
 from metalearn.metafeatures.metafeatures import Metafeatures
 import metalearn.metafeatures.constants as mf_consts
 
-from byudml import __version__ as __package_version__
-from byudml import __metafeature_path__, __metafeature_version__
+from byudml import __metafeature_path__, __version__
 
 
 Inputs = DataFrame
@@ -67,7 +66,7 @@ class MetafeatureExtractor(FeaturizationTransformerPrimitiveBase[Inputs, Outputs
     # This should contain only metadata which cannot be automatically determined from the code.
     metadata = metadata_base.PrimitiveMetadata({
         'id': '28d12214-8cb0-4ac0-8946-d31fcbcd4142',
-        'version': __metafeature_version__,
+        'version': __version__,
         'name': 'Dataset Metafeature Extraction',
         'source': {
             'name': 'byu-dml',
@@ -80,7 +79,7 @@ class MetafeatureExtractor(FeaturizationTransformerPrimitiveBase[Inputs, Outputs
             {
                 'type': metadata_base.PrimitiveInstallationType.PIP,
                 'package': 'byudml',
-                'version': __package_version__
+                'version': __version__,
             }
         ],
         'location_uris': [
@@ -179,7 +178,7 @@ def _set_implementation_fields(self, data_metafeatures, data_metafeatures_path):
         if landmarking_name not in data_metafeatures:
             primitive_field_path = [landmarking_name, 'primitive']
             random_seed_field_path = [landmarking_name, 'random_seed']
-            primitive_field_val = {'id': self.metadata.query()['id'], 'version': __metafeature_version__, 'python_path': self.metadata.query()['python_path'], 'name': self.metadata.query()['name']}
+            primitive_field_val = {'id': self.metadata.query()['id'], 'version': __version__, 'python_path': self.metadata.query()['python_path'], 'name': self.metadata.query()['name']}
             if 'digest' in self.metadata.query():
                 primitive_field_val['digest'] = self.metadata.query()['digest']
             random_seed_field_val = self.random_seed

diff --git a/byudml/profiler/profiler_primitive.py b/byudml/profiler/profiler_primitive.py
@@ -22,8 +22,7 @@
 from d3m.metadata import base as metadata_base, hyperparams as hyperparams_module, params
 from d3m.primitive_interfaces import base, unsupervised_learning
 
-from byudml import __version__ as __package_version__
-from byudml import __profiler_path__, __profiler_version__
+from byudml import __profiler_path__, __version__
 
 import common_primitives
 from common_primitives import utils
@@ -196,7 +195,7 @@ class SemanticProfilerPrimitive(unsupervised_learning.UnsupervisedLearnerPrimiti
     ]
     metadata = metadata_base.PrimitiveMetadata({
         'id': 'af214333-e67b-4e59-a49b-b16f5501a925',
-        'version': __profiler_version__,
+        'version': __version__,
         'name': 'Semantic Profiler',
         'description': 'This primitive is an adapatation of the d3m common profiler (https://gitlab.com/datadrivendiscovery/common-primitives/-/blob/c170029e9a0f875af28c6b9af20adc90bd4df0bb/common_primitives/simple_profiler.py). It predicts semantic column types using a natural language embeddings of the the column name. The internal model uses these embeddings to predict the semantic types found in the dataset annotations created by MIT Lincoln Labs.',
         'python_path': __profiler_path__,
@@ -211,7 +210,7 @@ class SemanticProfilerPrimitive(unsupervised_learning.UnsupervisedLearnerPrimiti
             {
                 'type': metadata_base.PrimitiveInstallationType.PIP,
                 'package': 'byudml',
-                'version': __package_version__
+                'version': __version__,
             },
         ] + _weights_configs,
         'algorithm_types': [

diff --git a/submission/pipelines/generate_pipelines.py b/submission/pipelines/generate_pipelines.py
@@ -15,10 +15,8 @@
 from byudml.imputer.random_sampling_imputer import RandomSamplingImputer
 from byudml.metafeature_extraction.metafeature_extraction import MetafeatureExtractor
 from byudml.profiler.profiler_primitive import SemanticProfilerPrimitive
-from byudml import (
-    __imputer_version__, __imputer_path__,  __metafeature_version__,  __metafeature_path__,
-    __profiler_version__, __profiler_path__
-)
+from byudml import __imputer_path__,  __metafeature_path__, __profiler_path__, __version__
+
 import sys
 sys.path.append('.')
 from submission.utils import (
@@ -708,68 +706,6 @@ def update_pipeline(
     return pipeline_to_update
 
 
-# TODO: do we still want this code?
-# def add_best_pipelines(base_dir):
-#     """
-#     This function checks the best_pipelines.csv for the best pipelines for a dataset, prepares and updates it, and writes it to the submodule.
-#     It also check how many pipelines beat MIT-LL and the EXlines.
-#     """
-#     mongo_client = pymongo.MongoClient(lab_hostname, real_mongo_port)
-
-#     imputer_version = None
-#     best_pipelines_df = pd.read_csv("submission/pipelines/best_pipelines.csv", index_col=0)
-
-#     beat_mit = 0
-#     beat_exlines = 0
-#     has_pipeline = 0
-#     for index, dataset in enumerate(best_pipelines_df):
-#         dataset_id = dataset.replace("_dataset", "", 1)
-#         if dataset_id not in list(seed_datasets_exlines.keys()):
-#             continue
-
-#         # grab the best pipeline
-#         pipelines = best_pipelines_df[dataset]
-#         best_pipeline_id = pipelines.idxmax()
-#         best_pipeline_score = pipelines.max()
-#         has_pipeline += 1
-
-#         # See how well we do compared to others
-#         problem_details = seed_datasets_exlines[dataset_id]
-#         if problem_details["problem"] == "accuracy":
-#             if problem_details["score"] <= best_pipeline_score:
-#                 beat_exlines += 1
-#             if problem_details["mit-score"] <= best_pipeline_score:
-#                 beat_mit += 1
-#         else:
-#             ## is regression
-#             if problem_details["score"] >= best_pipeline_score:
-#                 beat_exlines += 1
-#             if problem_details["mit-score"] >= best_pipeline_score:
-#                 beat_mit += 1
-
-#         # get the best pipeline and update it
-#         best_pipeline_json = get_pipeline_from_database(best_pipeline_id, mongo_client)
-#         del best_pipeline_json["_id"]
-#         no_digest_pipeline = remove_digests(best_pipeline_json)
-#         updated_pipeline = update_pipeline(no_digest_pipeline)
-
-#         # get directory to put new pipelines
-#         if imputer_version == None:
-#             IMPUTER_PIPELINE_PATH = os.path.join(base_dir, __imputer_path__, __imputer_version__, "pipelines/")
-
-#         print("Writing pipeline for dataset: {} to {}".format(dataset, IMPUTER_PIPELINE_PATH + best_pipeline_id + ".json"))
-#         with open(IMPUTER_PIPELINE_PATH + best_pipeline_id + ".json", "w") as file:
-#             file.write(json.dumps(updated_pipeline, indent=4))
-
-#         # TODO: Run the pipeline and save the pipeline run as well.
-
-
-#     print("############## RESULTS #################")
-#     print(beat_mit, " pipelines beat MIT")
-#     print(beat_exlines, " pipelines beat EXlines")
-#     print(has_pipeline, " pipelines for seed datasets")
-
-
 def generate_and_update_primitive_pipeline(
     primitive: PrimitiveBase,
     pipeline_gen_f: Callable,
@@ -803,19 +739,19 @@ def main():
         {
             'primitive': RandomSamplingImputer,
             'gen_method': generate_imputer_pipeline,
-            'version': __imputer_version__,
+            'version': __version__,
             'primitive_simple_name': 'random_sampling_imputer',
         },
         {
             'primitive': MetafeatureExtractor,
             'gen_method': generate_metafeature_pipeline,
-            'version': __metafeature_version__,
+            'version': __version__,
             'primitive_simple_name': 'metafeature_extractor',
         },
         {
             'primitive': SemanticProfilerPrimitive,
             'gen_method': generate_profiler_pipeline,
-            'version': __profiler_version__,
+            'version': __version__,
             'primitive_simple_name': 'profiler'
         },
     ]

diff --git a/submission/primitive_jsons/generate_primitive_jsons.py b/submission/primitive_jsons/generate_primitive_jsons.py
@@ -4,10 +4,8 @@
 from byudml.imputer.random_sampling_imputer import RandomSamplingImputer
 from byudml.metafeature_extraction.metafeature_extraction import MetafeatureExtractor
 from byudml.profiler.profiler_primitive import SemanticProfilerPrimitive
-from byudml import (
-    __imputer_version__, __imputer_path__,  __metafeature_version__,  __metafeature_path__,
-    __profiler_version__, __profiler_path__
-)
+from byudml import __imputer_path__, __metafeature_path__, __profiler_path__, __version__
+
 import sys
 sys.path.append('.')
 from submission.utils import get_new_d3m_path, clear_directory
@@ -30,9 +28,9 @@ def save_primitive_json(primitive, path):
 byu_dir = get_new_d3m_path()
 clear_directory(byu_dir)
 
-IMPUTER_JSON_PATH = os.path.join(byu_dir, __imputer_path__, __imputer_version__, PRIMITIVE_JSON)
-METAFEATURE_JSON_PATH = os.path.join(byu_dir, __metafeature_path__, __metafeature_version__, PRIMITIVE_JSON)
-PROFILER_JSON_PATH = os.path.join(byu_dir, __profiler_path__, __profiler_version__, PRIMITIVE_JSON)
+IMPUTER_JSON_PATH = os.path.join(byu_dir, __imputer_path__, __version__, PRIMITIVE_JSON)
+METAFEATURE_JSON_PATH = os.path.join(byu_dir, __metafeature_path__, __version__, PRIMITIVE_JSON)
+PROFILER_JSON_PATH = os.path.join(byu_dir, __profiler_path__, __version__, PRIMITIVE_JSON)
 
 save_primitive_json(RandomSamplingImputer, IMPUTER_JSON_PATH)
 save_primitive_json(MetafeatureExtractor, METAFEATURE_JSON_PATH)