diff --git a/analysis.ipynb b/analysis.ipynb
new file mode 100644
index 0000000..4f1e6b7
--- /dev/null
+++ b/analysis.ipynb
@@ -0,0 +1,560 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "March 6, 2022\n",
+    "\n",
+    "**Analysis**\n",
+    "## Identifying science in the news: An assessment of the precision and recall of Altmetric.com news mention data\n",
+    "\n",
+    "_Juan Pablo Alperin, ScholCommLab/School of Publishing, Simon Fraser University_\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Related Publication:**\n",
+    "\n",
+    "Fleerackers, A., Nehrig, L., Maggio, L.A., Enkhbayar, A., Moorhead, L., Alperin, J.P., (2022). Identifying science in the news: An assessment of the precision and recall of Altmetric.com news mention data. _arXiv_\n",
+    "\n",
+    "\n",
+    "**Related Data:**\n",
+    "\n",
+    "Fleerackers, Alice; Nehring, Lise; Alperin, Juan Pablo; Enkhbayar, Asura; Maggio, Lauren A.; Moorhead, Laura, 2022, \"Replication data for Identifying science in the news\", [https://doi.org/10.7910/DVN/WNDOFL](https://doi.org/10.7910/DVN/WNDOFL), _Harvard Dataverse_, V1, UNF:6:k9Hv0lysKrB+tQLkdOEZOw== [fileUNF] "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set this flag to True if you want to download the dataset\n",
+    "download_files = True\n",
+    "\n",
+    "if download_files:   \n",
+    "    import os\n",
+    "    from pyDataverse.api import NativeApi, DataAccessApi\n",
+    "    from pyDataverse.models import Dataverse\n",
+    "    \n",
+    "    if not os.path.exists('data/'):\n",
+    "        os.makedirs('data/')\n",
+    "\n",
+    "    base_url = 'https://dataverse.harvard.edu/'\n",
+    "\n",
+    "    api = NativeApi(base_url)\n",
+    "    data_api = DataAccessApi(base_url)\n",
+    "\n",
+    "\n",
+    "    DOI = \"doi:10.7910/DVN/WNDOFL\"\n",
+    "    dataset = api.get_dataset(DOI)\n",
+    "    \n",
+    "    files_list = dataset.json()['data']['latestVersion']['files']\n",
+    "\n",
+    "    for file in files_list:\n",
+    "        filename = file[\"dataFile\"][\"filename\"]\n",
+    "        file_id = file[\"dataFile\"][\"id\"]\n",
+    "        print(\"File name {}, id {}\".format(filename, file_id))\n",
+    "\n",
+    "        response = data_api.get_datafile(file_id)\n",
+    "        with open('data/' + filename, \"wb\") as f:\n",
+    "            f.write(response.content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# some helper functions to later help match URLs\n",
+    "def clean_url(url):\n",
+    "    return url.split('?')[0].strip('/').lower()\n",
+    "\n",
+    "def clean_doi(doi):\n",
+    "    try: \n",
+    "        if type(doi) == int:\n",
+    "            doi = str(doi)\n",
+    "        return doi.strip('/. ').lower()\n",
+    "    except:\n",
+    "        return np.nan\n",
+    "    \n",
+    "def make_bool(x):\n",
+    "    if x == 0 or x == 'No':\n",
+    "        return False\n",
+    "    if x == 1 or x == 'Yes':\n",
+    "        return True\n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "id_cols = ['DOI', 'ISBN', 'clinical_trial_id', 'URI', 'pubmed_id', 'pmc_id', 'handle', 'ads_bibcode', 'arxiv_id', 'repec_id', 'SSRN', 'URN']\n",
+    "dtypes = {c: str for c in id_cols}\n",
+    "\n",
+    "# Data downloaded from Altmetric Explorer on Sept 9, 2021\n",
+    "# Filter for all research mentions in the following outlets since March 1, 2021:\n",
+    "#    The Guardian, HealthDay, IFLScience, MedPage Today, News Medical, New York Times, Popular Science, and Wired\n",
+    "alt = pd.read_table('data/altmetric_dataset.tab', sep=\"\\t\", dtype=dtypes, encoding='utf8')\n",
+    "alt['URL'] = alt.URL.map(clean_url)\n",
+    "alt['DOI'] = alt.DOI.map(clean_doi)\n",
+    "alt.loc[:,'alt_id'] = alt.index\n",
+    "\n",
+    "alt['outlet'] = alt.outlet.map(lambda x: x.strip())\n",
+    "\n",
+    "alt.loc[:,id_cols] = alt.loc[:,id_cols].applymap(lambda x: x.lower() if x == x else x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We found some errors in the identifiers of the Altmetric data that we could obviously correct manually\n",
+    "url = 'https://www.iflscience.com/space/european-satellite-finds-12-very-rare-einstein-crosses'\n",
+    "i = '2012'\n",
+    "alt.loc[(alt.URL == url) & (alt.arxiv_id == i), 'arxiv_id'] = '2012.10051'\n",
+    "\n",
+    "url = 'https://www.wired.com/story/mathematicians-settle-the-erdos-coloring-conjecture'\n",
+    "i = '2101'\n",
+    "alt.loc[(alt.URL == url) & (alt.arxiv_id == i), 'arxiv_id'] = '2101.04698'\n",
+    "\n",
+    "url = 'https://www.news-medical.net/news/20210421/research-offers-new-insights-on-the-significance-of-hyperinflammation-following-sars-cov-2-infection.aspx'\n",
+    "i = '10.1002/(issn)1529-0131'\n",
+    "alt.loc[(alt.URL == url) & (alt.DOI == i), 'DOI'] = '10.1002/art.41763'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Some code we used to clean up and standarize the data we coded**\n",
+    "\n",
+    "**Final output file in published dataset**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# gold = pd.read_excel('dataset/gold.xlsx', engine='openpyxl')\n",
+    "\n",
+    "# gold['URL'] = gold.URL.map(clean_url)\n",
+    "# gold['DOI'] = gold.DOI.map(clean_doi)\n",
+    "\n",
+    "# gold.loc[:,'outlet'] = gold.outlet.str.strip()\n",
+    "# gold.loc[:,code_cols] = gold.loc[:,code_cols].applymap(make_bool)\n",
+    "# gold.rename(columns={'DOI': 'identifier'}, inplace=True)\n",
+    "\n",
+    "# gold.loc[:,'identifier'] = gold.identifier\n",
+    "\n",
+    "# s = 'arxiv:'\n",
+    "# gold.loc[gold.identifier.str.startswith(s, na=False),'identifier'] = gold.loc[gold.identifier.str.startswith(s, na=False),'identifier'].map(lambda x: x[len(s):])\n",
+    "# s = 'pmid: '\n",
+    "# gold.loc[gold.identifier.str.startswith(s, na=False),'identifier'] = gold.loc[gold.identifier.str.startswith(s, na=False),'identifier'].map(lambda x: x[len(s):])\n",
+    "\n",
+    "# gold['ResearchMentioned'] = gold.identifier.notnull()\n",
+    "# gold['gold_id'] = gold.index\n",
+    "\n",
+    "# gold.loc[:,code_cols] = gold.loc[:,code_cols].applymap(lambda x: int(x) if type(x) == bool else None)\n",
+    "\n",
+    "# # lowercase pmc and crinical trial\n",
+    "# gold.to_excel('content_analysis_dataset.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read in coded data\n",
+    "code_cols = ['Aggregated', 'PressRelease', 'ResearchMentioned', 'DescribesAsresearch', 'HasLink', 'JournalMentioned', 'AuthorMentioned', 'InstitutionMentioned', 'StudyDateMentioned']\n",
+    "gold = pd.read_table('data/content_analysis_dataset.tab')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table 1. Number of stories and mentions across news outlets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp = gold[gold.ResearchMentioned == 1].groupby(['outlet'])['URL'].agg(['nunique', 'size'])\n",
+    "tmp['n'] = gold.groupby(['outlet'])['URL'].nunique().astype(int)\n",
+    "tmp['pct'] = tmp['nunique'].divide(tmp['n']).multiply(100).round(0)\n",
+    "tmp['avg'] = tmp['size'].divide(tmp['nunique']).round(1)\n",
+    "tmp = tmp[['n', 'nunique', 'pct', 'size', 'avg']]\n",
+    "tmp.loc['Total'] = tmp.agg({'n': 'sum',\n",
+    "                           'nunique': 'sum',\n",
+    "                           'pct': 'mean',\n",
+    "                           'size': 'sum',\n",
+    "                           'avg': 'mean'})\n",
+    "tmp.loc[:,['n', 'nunique', 'pct', 'size']] = tmp.loc[:,['n', 'nunique', 'pct', 'size']].astype(int)\n",
+    "tmp['n'] = tmp['n'].astype(int)\n",
+    "tmp['nunique'] = tmp['nunique'].astype(int)\n",
+    "tmp['size'] = tmp['size'].astype(int)\n",
+    "tmp['avg'] = tmp['avg'].round(1)\n",
+    "\n",
+    "tmp.columns = ['Num Stories', 'Num Stories w/ Mentions', 'Percent Stories w/ Mentions', 'Num Mentions', 'Average Mentions / Story']\n",
+    "\n",
+    "tmp.to_clipboard()\n",
+    "tmp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table 2. How research was mentioned across news outlets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mention_cols = ['DescribesAsresearch', 'HasLink', 'JournalMentioned', 'AuthorMentioned', 'InstitutionMentioned', 'StudyDateMentioned']\n",
+    "def pct(x):\n",
+    "    return \"{:.0f}\".format(100*x.sum()/len(x))\n",
+    "\n",
+    "agg = {x: ['sum', pct] for x in mention_cols}\n",
+    "agg['URL'] = 'count'\n",
+    "\n",
+    "df = gold[gold.ResearchMentioned == True].groupby('outlet').agg(agg)\n",
+    "total = gold[gold.ResearchMentioned == True].groupby('ResearchMentioned').agg(agg)\n",
+    "total.index = ['Total']\n",
+    "df = df.append(total)\n",
+    "df = df.astype(int)\n",
+    "# columns = []\n",
+    "# for i, c in enumerate(df.columns): \n",
+    "#     x = c[0]\n",
+    "#     if i % 2 == 1: \n",
+    "#         x = ''\n",
+    "#     if c[1] == 'sum':\n",
+    "#         y = 'Number'\n",
+    "#     elif c[1] == 'pct':\n",
+    "#         y = '%'\n",
+    "#     elif c[1] == 'nunique':\n",
+    "#         y = 'N'\n",
+    "#     columns.append(\\n.join([x,y]))\n",
+    "# df.columns = columns\n",
+    "df = df.rename(columns={'sum': 'Number', 'pct': '%', 'count': 'Num Mentions'}, level=1)\n",
+    "    \n",
+    "df.to_clipboard()\n",
+    "df\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# id_cols = ['DOI', 'ISBN', 'clinical_trial_id', 'URI', 'pubmed_id', 'pmc_id', 'handle', 'ads_bibcode', 'arxiv_id', 'repec_id', 'SSRN', 'URN']\n",
+    "# alt.loc[:,id_cols] = alt.loc[:,id_cols].applymap(lambda x: x.lower() if type(x) == str else x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gold_wm = gold[gold.identifier.notnull()]\n",
+    "N = len(gold_wm)\n",
+    "print(\"Gold dataset has {} mentions\".format(N))\n",
+    "\n",
+    "altmetric_urls = set(alt.URL)\n",
+    "could_match = gold_wm[gold_wm.URL.isin(altmetric_urls)]\n",
+    "n = could_match.shape[0]\n",
+    "print(\"Of those, {} ({:.0f}%) could have ID match (URLs in altmetric)\".format(n, n*100/N))\n",
+    "\n",
+    "for i, identifier in enumerate(id_cols):\n",
+    "    df2 = alt[['URL', 'alt_id', identifier]].copy()\n",
+    "    df2.columns = ['URL', 'alt_id', 'matched_alt_id']\n",
+    "    df2.loc[:,'matched_id_type'] = identifier\n",
+    "    merged = gold_wm.merge(df2, left_on=['URL', 'identifier'], right_on=['URL', 'matched_alt_id'])\n",
+    "    if i == 0: \n",
+    "        matched = merged\n",
+    "    else:\n",
+    "        matched = matched.append(merged)\n",
+    "\n",
+    "print(\"Removing {} duplicate matches.\".format(matched.duplicated(subset=['gold_id']).sum()))\n",
+    "\n",
+    "matched.drop_duplicates(subset=['gold_id'], inplace=True)  # shouldn't happen, but altmetric has duplicates sometimes\n",
+    "\n",
+    "print(\"Of the {}, {} ({:.0f}%) have a match\".format(N, matched.shape[0], matched.shape[0]*100/N))\n",
+    "\n",
+    "our_urls_mentions = set(gold[gold.ResearchMentioned == True].URL)\n",
+    "no_chance_match = gold_wm[gold_wm.URL.isin(our_urls_mentions.difference(altmetric_urls))]\n",
+    "n = no_chance_match.shape[0]\n",
+    "print(\"Of the {}, {} ({:.0f}%) have no chance of matching (URLs not in altmetric)\".format(N, n, n*100/N))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alt_errors = alt[(alt.URL.isin(gold[gold.ResearchMentioned == True].URL)) & (~alt.alt_id.isin(matched.alt_id))]\n",
+    "\n",
+    "# errs = ['https://www.iflscience.com/health-and-medicine/cannabis-use-early-in-life-linked-to-some-changes-in-heart-and-artery-function', \n",
+    "#         'https://www.nytimes.com/2021/03/14/health/covid-schools-social-distancing-3-feet.html',\n",
+    "#         'https://www.nytimes.com/2021/04/01/health/pandemics-plague-history-resilience.html',\n",
+    "#         'https://www.nytimes.com/2021/04/07/science/particle-physics-muon-fermilab-brookhaven.html']\n",
+    "\n",
+    "# news = ['https://consumer.healthday.com/b-3-31-too-few-minorities-in-u-s-health-care-workforce-report-2651245191.html', \n",
+    "#         'https://www.iflscience.com/environment/bitcoin-mining-will-soon-pump-out-more-carbon-than-czech-republic-new-study-says', \n",
+    "#         'https://www.iflscience.com/health-and-medicine/male-fertility-how-everyday-chemicals-are-destroying-sperm-counts-in-humans-and-animals', \n",
+    "#         'https://www.iflscience.com/plants-and-animals/-a-surprising-number-of-sea-monster-sightings-can-be-explained-by-whale-erections', \n",
+    "#         'https://www.popsci.com/story/health/astrazeneca-vaccine-blood-clots', \n",
+    "#         'https://www.popsci.com/story/health/how-vaccine-passport-works', \n",
+    "#         'https://www.popsci.com/story/health/lyme-disease-treatment-for-humans', \n",
+    "#         'https://www.theguardian.com/society/2021/apr/02/covid-further-rare-blood-clot-cases-found-in-oxford-astrazeneca-recipients', \n",
+    "#         'https://www.theguardian.com/society/2021/apr/11/is-vaccinating-against-covid-enough-what-we-can-learn-from-other-countries', \n",
+    "#         'https://www.wired.com/story/blue-carbon-credits-could-help-restore-ecosystems', \n",
+    "#         'https://www.wired.com/story/how-cargo-ships-could-help-detect-tsunamis', \n",
+    "#         'https://www.wired.com/story/how-to-kill-a-zombie-fire']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _f_score_helper(outlet=False):\n",
+    "    if outlet: \n",
+    "        g = gold[gold.outlet == outlet]\n",
+    "        a = alt[alt.outlet == outlet]\n",
+    "        m = matched[matched.outlet == outlet]\n",
+    "        e = alt_errors[alt_errors.outlet == outlet]\n",
+    "    else:\n",
+    "        g = gold\n",
+    "        a = alt\n",
+    "        m = matched\n",
+    "        e = alt_errors\n",
+    "            \n",
+    "    return (g,a,m,e)\n",
+    "\n",
+    "def _f_score_calculation(tp, fp, fn):\n",
+    "    precision = tp / (tp + fp)\n",
+    "    recall = tp / (tp + fn)\n",
+    "    f_score = 2 * (precision * recall) / (precision + recall)\n",
+    "    \n",
+    "    return (precision, recall, f_score)\n",
+    "\n",
+    "        \n",
+    "## BY MENTION\n",
+    "\n",
+    "# TRUE POSITIVE: tp = altmetric says yes, we say yes \n",
+    "# FALSE POSITIVE: fp = altmetric says yes, but we say no \n",
+    "# FALSE NEGATIVE: fn = almetric says no, we say yes\n",
+    "def f_score_urls(outlet=False, display=False): \n",
+    "    g,a,m,e = _f_score_helper(outlet)\n",
+    "    \n",
+    "    our_urls_all = set(g.URL)\n",
+    "    our_urls_mentions = set(g[g.ResearchMentioned == True].URL)\n",
+    "    our_urls_no_mentions = set(g[~g.ResearchMentioned == False].URL)\n",
+    "    our_urls_no_mentions = our_urls_no_mentions.difference(our_urls_mentions)  # there's a couple (2) in both sets, because of uncoded duplicates. Remove.\n",
+    "    \n",
+    "    altmetric_urls = set(a.URL)\n",
+    "    \n",
+    "    tp = len(altmetric_urls.intersection(our_urls_mentions))\n",
+    "    fp = len(altmetric_urls.intersection(our_urls_no_mentions))\n",
+    "    fn = len(our_urls_mentions.difference(altmetric_urls))    \n",
+    "    \n",
+    "    precision, recall, f_score = _f_score_calculation(tp, fp, fn)\n",
+    "    \n",
+    "    if display:\n",
+    "        print(\"N = {}\".format(len(our_urls_mentions)))\n",
+    "        print(\"True Positive: {}\".format(tp))\n",
+    "        print(\"False Positive: {}\".format(fp))\n",
+    "        print(\"False Negative: {}\".format(fn))\n",
+    "        print(\"Precision: {:.2f}\".format(precision))\n",
+    "        print(\"Recall: {:.2f}\".format(recall))\n",
+    "        print(\"F-score: {:.2f}\".format(f_score))\n",
+    "\n",
+    "    return (f_score, precision, recall)    \n",
+    "    \n",
+    "def f_score_mentions(outlet=False, display=False):\n",
+    "    g,a,m,e = _f_score_helper(outlet)\n",
+    "    \n",
+    "    tp = m.shape[0]\n",
+    "    fp = e.shape[0]\n",
+    "    # fp = len(errs)\n",
+    "    fn = g[g.identifier.notnull()].shape[0] - m.shape[0]\n",
+    "\n",
+    "    (precision, recall, f_score) = _f_score_calculation(tp, fp, fn)\n",
+    "    \n",
+    "    if display:\n",
+    "        if outlet: \n",
+    "            print(\"Outlet: {}\".format(outlet))\n",
+    "        print(\"True Positive: {}\".format(tp))\n",
+    "        print(\"False Positive: {}\".format(fp))\n",
+    "        print(\"False Negative: {}\".format(fn))\n",
+    "        print(\"Precision: {:.2f}\".format(precision))\n",
+    "        print(\"Recall: {:.2f}\".format(recall))\n",
+    "        print(\"F-score: {:.2f}\".format(f_score))\n",
+    "\n",
+    "    return (precision, recall, f_score)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Optional code to create an excel sheet with the errors found"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# writer = pd.ExcelWriter('errors.xlsx', engine='openpyxl')\n",
+    "# alt_errors.to_excel(writer, 'False Positives')\n",
+    "# gold[(gold.identifier.notnull()) & ~(gold.gold_id.isin(matched.gold_id))].to_excel(writer, 'False Negatives')\n",
+    "# writer.save()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table 3. Precision, Recall, and Accuracy (F-score) by news outlet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(gold.groupby('outlet').size())\n",
+    "df.columns = ['N (mentions)']\n",
+    "df['scores'] = df.index.map(lambda x: f_score_mentions(x, False))\n",
+    "df['Precision'] = df.scores.map(lambda x: \"{:.2f}\".format(x[0]))\n",
+    "df['Recall'] = df.scores.map(lambda x: \"{:.2f}\".format(x[1]))\n",
+    "df['F-Score'] = df.scores.map(lambda x: \"{:.2f}\".format(x[2]))\n",
+    "del df['scores']\n",
+    "# df['scores'] = df['F-score'].map(lambda x: \"{:.2f}\".format(x))\n",
+    "df.to_clipboard()\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = gold[gold.gold_id.isin(matched.gold_id)]\n",
+    "df = df.append(gold[gold.gold_id.isin(no_chance_match.gold_id)])\n",
+    "df['match'] = df.gold_id.isin(matched.gold_id)\n",
+    "df['match'] = df.match.astype(int)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Logit Model "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import statsmodels.api as sm\n",
+    "import statsmodels.formula.api as smf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = smf.logit(formula=\"match ~ DescribesAsresearch + HasLink + JournalMentioned + AuthorMentioned + InstitutionMentioned + StudyDateMentioned\", data=df)\n",
+    "res = model.fit()\n",
+    "\n",
+    "res.params\n",
+    "odds = np.exp(res.params['HasLink'])\n",
+    "prob = '{:.1f}'.format(odds / (1 + odds))\n",
+    "\n",
+    "print(res.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = res.params\n",
+    "conf = res.conf_int()\n",
+    "conf['Odds Ratio'] = params\n",
+    "conf.columns = ['5%', '95%', 'Odds Ratio']\n",
+    "print(np.exp(conf))\n",
+    "\n",
+    "\n",
+    "np.exp(conf).to_clipboard()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}