From af88fccc15e371f771fc1e29c163d8df8358f074 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Fri, 27 Oct 2023 18:04:53 -0400 Subject: [PATCH] WIP --- docs/tutorials/design-with-safe.ipynb | 90 ++--- docs/tutorials/getting-started.ipynb | 536 +++++++++++--------------- safe/__init__.py | 2 - safe/_version.py | 14 - 4 files changed, 268 insertions(+), 374 deletions(-) delete mode 100644 safe/_version.py diff --git a/docs/tutorials/design-with-safe.ipynb b/docs/tutorials/design-with-safe.ipynb index 52fe8f9..3de96b7 100644 --- a/docs/tutorials/design-with-safe.ipynb +++ b/docs/tutorials/design-with-safe.ipynb @@ -7,47 +7,49 @@ "outputs": [], "source": [ "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "!!! info\n", - "\n", - " Pretrained checkpoint for autoregressive only models are available at : `gs://valence-experiments/safe/generative-model/`" + "%autoreload 2\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", - "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ + "\n", + "\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", + "\n", + "\n", "import safe as sf\n", - "import datamol as dm\n", - "from safe.trainer.data_utils import get_dataset\n", - "from safe import SAFEDesign" + "import datamol as dm\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "HFValidationError", + "evalue": "Repo id must be in the form 'repo_name' or 'namespace/repo_name': '/home/hadim/.cache/safe/default_model'. Use `repo_type` argument if needed.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mHFValidationError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/hadim/Code/valence/Libs/safe/docs/tutorials/design-with-safe.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m designer \u001b[39m=\u001b[39m sf\u001b[39m.\u001b[39;49mSAFEDesign\u001b[39m.\u001b[39;49mload_default(verbose\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n", + "File \u001b[0;32m~/Code/valence/Libs/safe/safe/sample.py:89\u001b[0m, in \u001b[0;36mSAFEDesign.load_default\u001b[0;34m(cls, verbose, model_dir, device)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[39mif\u001b[39;00m model_dir \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mor\u001b[39;00m \u001b[39mnot\u001b[39;00m model_dir:\n\u001b[1;32m 88\u001b[0m model_dir \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_DEFAULT_MODEL_PATH\n\u001b[0;32m---> 89\u001b[0m model \u001b[39m=\u001b[39m SAFEDoubleHeadsModel\u001b[39m.\u001b[39;49mfrom_pretrained(model_dir)\n\u001b[1;32m 90\u001b[0m tokenizer \u001b[39m=\u001b[39m SAFETokenizer\u001b[39m.\u001b[39mload(os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mjoin(model_dir, \u001b[39m\"\u001b[39m\u001b[39mtokenizer.json\u001b[39m\u001b[39m\"\u001b[39m))\n\u001b[1;32m 91\u001b[0m gen_config \u001b[39m=\u001b[39m GenerationConfig\u001b[39m.\u001b[39mfrom_pretrained(model_dir)\n", + "File \u001b[0;32m~/local/micromamba/envs/safe/lib/python3.11/site-packages/transformers/modeling_utils.py:2507\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 2504\u001b[0m \u001b[39mif\u001b[39;00m commit_hash \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 2505\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(config, PretrainedConfig):\n\u001b[1;32m 2506\u001b[0m \u001b[39m# We make a call to the config file first (which may be absent) to get the commit hash as soon as possible\u001b[39;00m\n\u001b[0;32m-> 2507\u001b[0m resolved_config_file \u001b[39m=\u001b[39m cached_file(\n\u001b[1;32m 2508\u001b[0m pretrained_model_name_or_path,\n\u001b[1;32m 2509\u001b[0m CONFIG_NAME,\n\u001b[1;32m 2510\u001b[0m cache_dir\u001b[39m=\u001b[39;49mcache_dir,\n\u001b[1;32m 2511\u001b[0m force_download\u001b[39m=\u001b[39;49mforce_download,\n\u001b[1;32m 2512\u001b[0m resume_download\u001b[39m=\u001b[39;49mresume_download,\n\u001b[1;32m 2513\u001b[0m proxies\u001b[39m=\u001b[39;49mproxies,\n\u001b[1;32m 2514\u001b[0m local_files_only\u001b[39m=\u001b[39;49mlocal_files_only,\n\u001b[1;32m 2515\u001b[0m token\u001b[39m=\u001b[39;49mtoken,\n\u001b[1;32m 2516\u001b[0m revision\u001b[39m=\u001b[39;49mrevision,\n\u001b[1;32m 2517\u001b[0m subfolder\u001b[39m=\u001b[39;49msubfolder,\n\u001b[1;32m 2518\u001b[0m _raise_exceptions_for_missing_entries\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 2519\u001b[0m _raise_exceptions_for_connection_errors\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 2520\u001b[0m )\n\u001b[1;32m 2521\u001b[0m commit_hash \u001b[39m=\u001b[39m extract_commit_hash(resolved_config_file, commit_hash)\n\u001b[1;32m 2522\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[0;32m~/local/micromamba/envs/safe/lib/python3.11/site-packages/transformers/utils/hub.py:429\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 426\u001b[0m user_agent \u001b[39m=\u001b[39m http_user_agent(user_agent)\n\u001b[1;32m 427\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 428\u001b[0m \u001b[39m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 429\u001b[0m resolved_file \u001b[39m=\u001b[39m hf_hub_download(\n\u001b[1;32m 430\u001b[0m path_or_repo_id,\n\u001b[1;32m 431\u001b[0m filename,\n\u001b[1;32m 432\u001b[0m subfolder\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m \u001b[39mif\u001b[39;49;00m \u001b[39mlen\u001b[39;49m(subfolder) \u001b[39m==\u001b[39;49m \u001b[39m0\u001b[39;49m \u001b[39melse\u001b[39;49;00m subfolder,\n\u001b[1;32m 433\u001b[0m repo_type\u001b[39m=\u001b[39;49mrepo_type,\n\u001b[1;32m 434\u001b[0m revision\u001b[39m=\u001b[39;49mrevision,\n\u001b[1;32m 435\u001b[0m cache_dir\u001b[39m=\u001b[39;49mcache_dir,\n\u001b[1;32m 436\u001b[0m user_agent\u001b[39m=\u001b[39;49muser_agent,\n\u001b[1;32m 437\u001b[0m force_download\u001b[39m=\u001b[39;49mforce_download,\n\u001b[1;32m 438\u001b[0m proxies\u001b[39m=\u001b[39;49mproxies,\n\u001b[1;32m 439\u001b[0m resume_download\u001b[39m=\u001b[39;49mresume_download,\n\u001b[1;32m 440\u001b[0m token\u001b[39m=\u001b[39;49mtoken,\n\u001b[1;32m 441\u001b[0m local_files_only\u001b[39m=\u001b[39;49mlocal_files_only,\n\u001b[1;32m 442\u001b[0m )\n\u001b[1;32m 443\u001b[0m \u001b[39mexcept\u001b[39;00m GatedRepoError \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 444\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 445\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mYou are trying to access a gated repo.\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mMake sure to request access at \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 446\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttps://huggingface.co/\u001b[39m\u001b[39m{\u001b[39;00mpath_or_repo_id\u001b[39m}\u001b[39;00m\u001b[39m and pass a token having permission to this repo either \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 447\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mby logging in with `huggingface-cli login` or by passing `token=`.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 448\u001b[0m ) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", + "File \u001b[0;32m~/local/micromamba/envs/safe/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:110\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[39mfor\u001b[39;00m arg_name, arg_value \u001b[39min\u001b[39;00m chain(\n\u001b[1;32m 106\u001b[0m \u001b[39mzip\u001b[39m(signature\u001b[39m.\u001b[39mparameters, args), \u001b[39m# Args values\u001b[39;00m\n\u001b[1;32m 107\u001b[0m kwargs\u001b[39m.\u001b[39mitems(), \u001b[39m# Kwargs values\u001b[39;00m\n\u001b[1;32m 108\u001b[0m ):\n\u001b[1;32m 109\u001b[0m \u001b[39mif\u001b[39;00m arg_name \u001b[39min\u001b[39;00m [\u001b[39m\"\u001b[39m\u001b[39mrepo_id\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mfrom_id\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mto_id\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m--> 110\u001b[0m validate_repo_id(arg_value)\n\u001b[1;32m 112\u001b[0m \u001b[39melif\u001b[39;00m arg_name \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mtoken\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mand\u001b[39;00m arg_value \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 113\u001b[0m has_token \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/local/micromamba/envs/safe/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:158\u001b[0m, in \u001b[0;36mvalidate_repo_id\u001b[0;34m(repo_id)\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[39mraise\u001b[39;00m HFValidationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mRepo id must be a string, not \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mtype\u001b[39m(repo_id)\u001b[39m}\u001b[39;00m\u001b[39m: \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mrepo_id\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 157\u001b[0m \u001b[39mif\u001b[39;00m repo_id\u001b[39m.\u001b[39mcount(\u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m--> 158\u001b[0m \u001b[39mraise\u001b[39;00m HFValidationError(\n\u001b[1;32m 159\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mRepo id must be in the form \u001b[39m\u001b[39m'\u001b[39m\u001b[39mrepo_name\u001b[39m\u001b[39m'\u001b[39m\u001b[39m or \u001b[39m\u001b[39m'\u001b[39m\u001b[39mnamespace/repo_name\u001b[39m\u001b[39m'\u001b[39m\u001b[39m:\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 160\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mrepo_id\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m. Use `repo_type` argument if needed.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 161\u001b[0m )\n\u001b[1;32m 163\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m REPO_ID_REGEX\u001b[39m.\u001b[39mmatch(repo_id):\n\u001b[1;32m 164\u001b[0m \u001b[39mraise\u001b[39;00m HFValidationError(\n\u001b[1;32m 165\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mRepo id must use alphanumeric chars or \u001b[39m\u001b[39m'\u001b[39m\u001b[39m-\u001b[39m\u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m_\u001b[39m\u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m--\u001b[39m\u001b[39m'\u001b[39m\u001b[39m and \u001b[39m\u001b[39m'\u001b[39m\u001b[39m..\u001b[39m\u001b[39m'\u001b[39m\u001b[39m are\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 166\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m forbidden, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m-\u001b[39m\u001b[39m'\u001b[39m\u001b[39m and \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m'\u001b[39m\u001b[39m cannot start or end the name, max length is 96:\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 167\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mrepo_id\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 168\u001b[0m )\n", + "\u001b[0;31mHFValidationError\u001b[0m: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '/home/hadim/.cache/safe/default_model'. Use `repo_type` argument if needed." + ] + } + ], "source": [ - "designer = SAFEDesign.load_default(verbose=True)" + "designer = sf.SAFEDesign.load_default(verbose=True)\n" ] }, { @@ -182,7 +184,7 @@ } ], "source": [ - "dm.to_image(dm.to_mol(candidate_mol))" + "dm.to_image(dm.to_mol(candidate_mol))\n" ] }, { @@ -191,7 +193,7 @@ "metadata": {}, "outputs": [], "source": [ - "N_SAMPLES = 100" + "N_SAMPLES = 100\n" ] }, { @@ -217,7 +219,7 @@ } ], "source": [ - "generated = designer.de_novo_generation(sanitize=True, n_samples_per_trial=N_SAMPLES)" + "generated = designer.de_novo_generation(sanitize=True, n_samples_per_trial=N_SAMPLES)\n" ] }, { @@ -1631,7 +1633,7 @@ } ], "source": [ - "dm.to_image(generated[:20])" + "dm.to_image(generated[:20])\n" ] }, { @@ -1708,7 +1710,7 @@ } ], "source": [ - "dm.to_image(scaffold)" + "dm.to_image(scaffold)\n" ] }, { @@ -1725,7 +1727,7 @@ } ], "source": [ - "generated = designer.scaffold_decoration(scaffold=scaffold, n_samples_per_trial=N_SAMPLES, n_trials=2, sanitize=True, do_not_fragment_further=True)" + "generated = designer.scaffold_decoration(scaffold=scaffold, n_samples_per_trial=N_SAMPLES, n_trials=2, sanitize=True, do_not_fragment_further=True)\n" ] }, { @@ -5994,7 +5996,7 @@ } ], "source": [ - "dm.viz.lasso_highlight_image([dm.to_mol(x) for x in generated[:20]], dm.from_smarts(scaffold))" + "dm.viz.lasso_highlight_image([dm.to_mol(x) for x in generated[:20]], dm.from_smarts(scaffold))\n" ] }, { @@ -6059,7 +6061,7 @@ } ], "source": [ - "dm.to_image(superstructure)" + "dm.to_image(superstructure)\n" ] }, { @@ -6077,7 +6079,7 @@ ], "source": [ "generated = designer.super_structure(core=superstructure, n_samples_per_trial=N_SAMPLES, n_trials=1, sanitize=True, do_not_fragment_further=False, attachment_point_depth=3)\n", - "#generated" + "#generated\n" ] }, { @@ -7225,7 +7227,7 @@ } ], "source": [ - "dm.to_image(generated[:20])" + "dm.to_image(generated[:20])\n" ] }, { @@ -7277,7 +7279,7 @@ } ], "source": [ - "dm.to_image(motif)" + "dm.to_image(motif)\n" ] }, { @@ -7295,7 +7297,7 @@ ], "source": [ "# let's make some long sequence\n", - "generated = designer.motif_extension(motif=motif, n_samples_per_trial=N_SAMPLES, n_trials=1, sanitize=True, do_not_fragment_further=False, min_length=25, max_length=80)" + "generated = designer.motif_extension(motif=motif, n_samples_per_trial=N_SAMPLES, n_trials=1, sanitize=True, do_not_fragment_further=False, min_length=25, max_length=80)\n" ] }, { @@ -8451,7 +8453,7 @@ } ], "source": [ - "dm.to_image(generated[:20])" + "dm.to_image(generated[:20])\n" ] }, { @@ -8541,7 +8543,7 @@ } ], "source": [ - "dm.to_image(side_chains)" + "dm.to_image(side_chains)\n" ] }, { @@ -10006,7 +10008,7 @@ ], "source": [ "generated = designer.scaffold_morphing(side_chains=side_chains, n_samples_per_trial=N_SAMPLES, n_trials=1, sanitize=True, do_not_fragment_further=False, random_seed=100)\n", - "dm.to_image(generated[:20])" + "dm.to_image(generated[:20])\n" ] }, { @@ -10112,7 +10114,7 @@ } ], "source": [ - "dm.to_image(linker_generation)" + "dm.to_image(linker_generation)\n" ] }, { @@ -12320,7 +12322,7 @@ ], "source": [ "generated = designer.linker_generation(*linker_generation, n_samples_per_trial=N_SAMPLES, n_trials=1, sanitize=True, do_not_fragment_further=False, random_seed=100)\n", - "dm.to_image(generated[:20])" + "dm.to_image(generated[:20])\n" ] } ], @@ -12340,7 +12342,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.6" }, "orig_nbformat": 4 }, diff --git a/docs/tutorials/getting-started.ipynb b/docs/tutorials/getting-started.ipynb index 1a1cb2a..d12b1fc 100644 --- a/docs/tutorials/getting-started.ipynb +++ b/docs/tutorials/getting-started.ipynb @@ -2,12 +2,21 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", - "%autoreload 2" + "%autoreload 2\n" ] }, { @@ -110,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -119,77 +128,77 @@ "\n", "\n", " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", "" ], "text/plain": [ @@ -206,7 +215,7 @@ "\n", "celecoxib = \"Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1\"\n", "celecoxib_mol = dm.to_mol(celecoxib)\n", - "display(dm.to_image(celecoxib_mol))" + "display(dm.to_image(celecoxib_mol))\n" ] }, { @@ -216,22 +225,22 @@ "source": [ "#### Encoding\n", "\n", - "!!! info \"SAFE represents fragments\"\n", + "**SAFE represents fragments**\n", "\n", - " SAFE represents molecules as a set of N [Fragment_1].[Fragment_i].[Fragment_N]\n", + "SAFE represents molecules as a set of N [Fragment_1].[Fragment_i].[Fragment_N]\n", " " ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "c14ccc(S(N)(=O)=O)cc1.Cc1ccc3cc1.c13cc5nn14.C5(F)(F)F\n", + "c13ccc(S(N)(=O)=O)cc1.Cc1ccc4cc1.c14cc5nn13.C5(F)(F)F\n", "Representation using 4 fragments\n" ] } @@ -239,7 +248,7 @@ "source": [ "safe_str = sf.encode(celecoxib_mol)\n", "print(safe_str)\n", - "print(f\"Representation using {len(safe_str.split('.'))} fragments\")" + "print(f\"Representation using {len(safe_str.split('.'))} fragments\")\n" ] }, { @@ -247,14 +256,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "!!! info \"SAFE string are SMILES\"\n", + "**SAFE string are SMILES**\n", "\n", - " Any SAFE string is a valid SMILES and can be read by RDKit without any decoding trick." + "Any SAFE string is a valid SMILES and can be read by RDKit without any decoding trick." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -263,77 +272,77 @@ "\n", "\n", " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", "" ], "text/plain": [ @@ -349,7 +358,7 @@ "True" ] }, - "execution_count": 4, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -357,7 +366,7 @@ "source": [ "reconstructed = dm.to_mol(safe_str)\n", "display(dm.to_image(reconstructed))\n", - "dm.same_mol(celecoxib_mol, reconstructed)" + "dm.same_mol(celecoxib_mol, reconstructed)\n" ] }, { @@ -365,21 +374,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "!!! info \"SAFE supports randomization\"\n", + "**SAFE supports randomization**\n", "\n", - " You can generate randomized SAFE strings." + "You can generate randomized SAFE strings." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "C(F)(F)(F)4.c1cc(S(=O)(=O)N)ccc12.c13ccc(C)cc1.n12nc4cc13\n" + "C6(F)(F)F.c15ccc(C)cc1.c15cc6nn14.c14ccc(S(N)(=O)=O)cc1\n" ] }, { @@ -388,7 +397,7 @@ "True" ] }, - "execution_count": 5, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -397,7 +406,7 @@ "random_safe_str = sf.encode(celecoxib_mol, canonical=False, randomize=True)\n", "print(random_safe_str)\n", "reconstructed = dm.to_mol(safe_str)\n", - "dm.same_mol(celecoxib_mol, reconstructed)" + "dm.same_mol(celecoxib_mol, reconstructed)\n" ] }, { @@ -405,21 +414,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "!!! info \"Fragment order in SAFE does not matter\"\n", + "**Fragment order in SAFE does not matter**\n", "\n", - " Any permutation of the fragment order in a SAFE string preserve the molecule identity" + "Any permutation of the fragment order in a SAFE string preserve the molecule identity" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "c14ccc(S(N)(=O)=O)cc1.c13cc5nn14.Cc1ccc3cc1.C5(F)(F)F c14ccc(S(N)(=O)=O)cc1.Cc1ccc3cc1.c13cc5nn14.C5(F)(F)F\n" + "c13ccc(S(N)(=O)=O)cc1.C5(F)(F)F.Cc1ccc4cc1.c14cc5nn13 c13ccc(S(N)(=O)=O)cc1.Cc1ccc4cc1.c14cc5nn13.C5(F)(F)F\n" ] }, { @@ -428,18 +437,20 @@ "True" ] }, - "execution_count": 6, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", + "\n", "fragments = safe_str.split(\".\")\n", "randomized_fragment_safe_str = np.random.permutation(fragments).tolist()\n", "randomized_fragment_safe_str = \".\".join(randomized_fragment_safe_str)\n", + "\n", "print(randomized_fragment_safe_str, safe_str)\n", - "dm.same_mol(celecoxib_mol, randomized_fragment_safe_str)" + "dm.same_mol(celecoxib_mol, randomized_fragment_safe_str)\n" ] }, { @@ -447,128 +458,34 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "!!! info \"Use your own slicing logic\"\n", + "**Use your own slicing logic**\n", " \n", - " By default SAFE strings are generated using `BRICS`, however, the following are supported:\n", - " * [Hussain-Rea (`hr`)](https://pubs.acs.org/doi/10.1021/ci900450m)\n", - " * [RECAP (`recap`)](https://pubmed.ncbi.nlm.nih.gov/9611787/)\n", - " * [RDKit's MMPA (`mmpa`)](https://www.rdkit.org/docs/source/rdkit.Chem.rdMMPA.html)\n", - " * Any possible attachment points (`attach`)\n", - " \n", - " Furthermore, you can also provide your own slicing algorithm, which should return a pair of atoms corresponding to the bonds to break. \n", - "\n" + "By default SAFE strings are generated using `BRICS`, however, the following are supported:\n", + "\n", + "* [Hussain-Rea (`hr`)](https://pubs.acs.org/doi/10.1021/ci900450m)\n", + "* [RECAP (`recap`)](https://pubmed.ncbi.nlm.nih.gov/9611787/)\n", + "* [RDKit's MMPA (`mmpa`)](https://www.rdkit.org/docs/source/rdkit.Chem.rdMMPA.html)\n", + "* Any possible attachment points (`attach`)\n", + " \n", + "Furthermore, you can also provide your own slicing algorithm, which should return a pair of atoms corresponding to the bonds to break. " ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 16, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "C%19%20%17%24.c1%19c9c%15c%25c8c1%14.c1%25c5c%12nn1%26.C%18%217%12.F%18.F7.F%21.c1%26c%27c%22c4c%16c1%11.S=3=%1046.N%236%13.O=3.O=%10.[H]%17.[H]%24.[H]%20.[H]9.[H]%15.[H]5.[H]%27.[H]%22.[H]%13.[H]%23.[H]%16.[H]%11.[H]8.[H]%14\n", - "Representation using 26 fragments\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "safe_str = sf.encode(celecoxib_mol, canonical=False, slicer=\"attach\")\n", - "print(safe_str)\n", - "print(f\"Representation using {len(safe_str.split('.'))} fragments\")\n", - "display(dm.to_image(safe_str))" + "# safe_str = sf.encode(celecoxib_mol, canonical=False, slicer=\"attach\")\n", + "\n", + "# print(safe_str)\n", + "# print(f\"Representation using {len(safe_str.split('.'))} fragments\")\n", + "# display(dm.to_image(safe_str))\n" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -576,32 +493,23 @@ " \"\"\"Slice on non single bonds where at both atoms are in a distinct rings\"\"\"\n", " for bond in mol.GetBonds():\n", " if bond.GetBondType() == dm.SINGLE_BOND and not bond.IsInRing() and (bond.GetBeginAtom().IsInRing() and bond.GetEndAtom().IsInRing()):\n", - " yield (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())" + " yield (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())\n" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 19, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "c14cc(C(F)(F)F)nn13.c13ccc(S(N)(=O)=O)cc1.Cc1ccc4cc1\n", - "Representation using 3 fragments\n" - ] - } - ], + "outputs": [], "source": [ - "safe_str = sf.encode(celecoxib_mol, canonical=True, slicer=my_slicer)\n", - "print(safe_str)\n", - "print(f\"Representation using {len(safe_str.split('.'))} fragments\")" + "# safe_str = sf.encode(celecoxib_mol, canonical=True, slicer=my_slicer)\n", + "# print(safe_str)\n", + "# print(f\"Representation using {len(safe_str.split('.'))} fragments\")\n" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -618,7 +526,7 @@ "smart_slicer = [\"[r]-;!@[r]\"]\n", "safe_str = sf.encode(celecoxib_mol, canonical=True, slicer=smart_slicer)\n", "print(safe_str)\n", - "print(f\"Representation using {len(safe_str.split('.'))} fragments\")" + "print(f\"Representation using {len(safe_str.split('.'))} fragments\")\n" ] }, { @@ -651,7 +559,7 @@ ], "source": [ "safe_fragment = safe_str.split(\".\")\n", - "safe_fragment" + "safe_fragment\n" ] }, { @@ -669,7 +577,7 @@ ], "source": [ "# the following will fail\n", - "dm.to_mol(safe_fragment[0])" + "dm.to_mol(safe_fragment[0])\n" ] }, { @@ -691,7 +599,7 @@ ], "source": [ "# while this works\n", - "sf.decode(safe_fragment[0], as_mol=True)" + "sf.decode(safe_fragment[0], as_mol=True)\n" ] }, { @@ -713,7 +621,7 @@ ], "source": [ "# if you want to keep the attachment points, then use remove_dummies=False\n", - "sf.decode(safe_fragment[0], as_mol=True, remove_dummies=False)" + "sf.decode(safe_fragment[0], as_mol=True, remove_dummies=False)\n" ] }, { @@ -1012,7 +920,7 @@ } ], "source": [ - "sf.to_image(safe_str)" + "sf.to_image(safe_str)\n" ] }, { @@ -1047,7 +955,7 @@ " image_fill = widgets.Image(value=sf.to_image(safe_str, highlight_mode=\"fill\", legend=\"fill mode\").data.encode(), format='svg+xml')\n", " image_color = widgets.Image(value=sf.to_image(safe_str, highlight_mode=\"color\", legend=\"color mode\").data.encode(), format='svg+xml')\n", " hbox = HBox([image_lasso, image_fill, image_color])\n", - " display(hbox)" + " display(hbox)\n" ] }, { @@ -1073,7 +981,7 @@ "source": [ "# display for brics\n", "safe_str_brics = sf.encode(celecoxib_mol, canonical=True, slicer=\"brics\")\n", - "display_image(safe_str_brics)" + "display_image(safe_str_brics)\n" ] }, { @@ -1099,7 +1007,7 @@ "source": [ "# display with HR\n", "safe_str_hr = sf.encode(celecoxib_mol, canonical=True, slicer=\"mmpa\")\n", - "display_image(safe_str_hr)" + "display_image(safe_str_hr)\n" ] }, { @@ -1139,7 +1047,7 @@ "from rdkit import Chem\n", "from rdkit.Chem.Draw import rdDepictor\n", "from rdkit.Chem import rdChemReactions as rdr\n", - "rdDepictor.SetPreferCoordGen(True)" + "rdDepictor.SetPreferCoordGen(True)\n" ] }, { @@ -1281,7 +1189,7 @@ "source": [ "smiles = ['c1ccccc1', 'OC', 'c1cc(*)ccc1', 'O(*)C', 'c1cc(*)ccc1.O(*)C']\n", "legends = ['benzene', 'methanol', 'phenyl group', 'Methoxy group', 'composite']\n", - "dm.viz.to_image([dm.to_mol(x) for x in smiles], legends=legends, n_cols=len(smiles), use_svg=True)" + "dm.viz.to_image([dm.to_mol(x) for x in smiles], legends=legends, n_cols=len(smiles), use_svg=True)\n" ] }, { @@ -1368,7 +1276,7 @@ ], "source": [ "smiles = [ 'c1cc(*)ccc1.O(*)C', 'c1cc([*:1])ccc1.O([*:1])C'] #\n", - "dm.viz.to_image([dm.to_mol(x) for x in smiles], n_cols=len(smiles), use_svg=True)" + "dm.viz.to_image([dm.to_mol(x) for x in smiles], n_cols=len(smiles), use_svg=True)\n" ] }, { @@ -1398,7 +1306,7 @@ ], "source": [ "rxn = rdr.ReactionFromSmarts(\"[1*][*:1].[1*][*:2]>>[*:1][*:2]\")\n", - "rxn" + "rxn\n" ] }, { @@ -1425,7 +1333,7 @@ "\n", "# runreactions\n", "prod = rxn.RunReactants((dm.to_mol(phenyl), dm.to_mol(methoxy)))\n", - "prod[0][0]" + "prod[0][0]\n" ] }, { @@ -1464,7 +1372,7 @@ "replacement_sub = Chem.MolFromSmarts(\"[1*]\")\n", "prod = Chem.ReplaceSubstructs(dm.to_mol(phenyl), replacement_sub, dm.to_mol(methoxy), replacementConnectionPoint=0)\n", "prod = dm.remove_dummies(prod[0], dummy=\"[1*]\")\n", - "prod" + "prod\n" ] }, { @@ -1486,7 +1394,7 @@ "phenyl = \"c1cc([*:1])ccc1\"\n", "methoxy = \"O([*:1])C\"\n", "composite = phenyl + \".\" + methoxy # c1cc([*:1])ccc1.O([*:1])C\n", - "compo = dm.to_mol(composite)" + "compo = dm.to_mol(composite)\n" ] }, { @@ -1513,7 +1421,7 @@ ], "source": [ "attached_composite = composite.replace(\"[*:1]\", \"2\")\n", - "dm.to_mol(attached_composite)" + "dm.to_mol(attached_composite)\n" ] }, { @@ -1620,7 +1528,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.6" }, "orig_nbformat": 4 }, diff --git a/safe/__init__.py b/safe/__init__.py index c7926c6..efb8973 100644 --- a/safe/__init__.py +++ b/safe/__init__.py @@ -1,5 +1,3 @@ -from ._version import __version__ - from .converter import encode from .converter import decode from .converter import SAFEConverter diff --git a/safe/_version.py b/safe/_version.py deleted file mode 100644 index 9dfcb0f..0000000 --- a/safe/_version.py +++ /dev/null @@ -1,14 +0,0 @@ -try: - from importlib.metadata import version - from importlib.metadata import PackageNotFoundError -except ModuleNotFoundError: - # Try backported to PY<38 `importlib_metadata`. - from importlib_metadata import version - from importlib_metadata import PackageNotFoundError - - -try: - __version__ = "0.0.2" -except PackageNotFoundError: - # package is not installed - __version__ = "0.0.2"