diff --git a/Pandas_Basics.ipynb b/Pandas_Basics.ipynb
new file mode 100644
index 0000000..8896532
--- /dev/null
+++ b/Pandas_Basics.ipynb
@@ -0,0 +1,519 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " birthday | \n",
+ " name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 10-Jan-1980 | \n",
+ " Jessen H | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2/28/85 | \n",
+ " Vic A | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 16.01.1975 00:00:00 | \n",
+ " Linden L | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " birthday name\n",
+ "0 10-Jan-1980 Jessen H \n",
+ "1 2/28/85 Vic A \n",
+ "2 16.01.1975 00:00:00 Linden L "
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.DataFrame({'name':[' Jessen H ', ' Vic A ', ' Linden L '], 'birthday':['10-Jan-1980', '2/28/85', '16.01.1975 00:00:00']})\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Jessen H \n"
+ ]
+ }
+ ],
+ "source": [
+ "print (df.loc[0, 'name'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 3 entries, 0 to 2\n",
+ "Data columns (total 2 columns):\n",
+ "birthday 3 non-null object\n",
+ "name 3 non-null object\n",
+ "dtypes: object(2)\n",
+ "memory usage: 128.0+ bytes\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def strip_str(s):\n",
+ " return s.strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#strip whitespaces on name column\n",
+ "df['name'] = df['name'].apply(lambda x:strip_str(x))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#df['name'] = df['name'].astype(str).apply(lambda x:x.strip())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def get_first_name(name):\n",
+ " return name.split()[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#create first_name column from name column\n",
+ "df['first_name'] = df['name'].apply(lambda x:get_first_name(x))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "df['last_name'] = df['name'].apply(lambda x:x.split()[1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " birthday | \n",
+ " name | \n",
+ " first_name | \n",
+ " last_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 10-Jan-1980 | \n",
+ " Jessen H | \n",
+ " Jessen | \n",
+ " H | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2/28/85 | \n",
+ " Vic A | \n",
+ " Vic | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 16.01.1975 00:00:00 | \n",
+ " Linden L | \n",
+ " Linden | \n",
+ " L | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " birthday name first_name last_name\n",
+ "0 10-Jan-1980 Jessen H Jessen H\n",
+ "1 2/28/85 Vic A Vic A\n",
+ "2 16.01.1975 00:00:00 Linden L Linden L"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Jessen H\n"
+ ]
+ }
+ ],
+ "source": [
+ "print (df.loc[0, 'name'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "from datetime import datetime\n",
+ "datetime.strptime?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Datetime format\n",
+ "%b - abbreviated month name, Jan, Dec\n",
+ "\n",
+ "%B - full month name\n",
+ "\n",
+ "%d - day of the month (01 to 31), also works without 0 padding, 1-31\n",
+ "\n",
+ "%D - same as %m/%d/%y\n",
+ "\n",
+ "%H - hour, using a 24-hour clock (00 to 23)\n",
+ "\n",
+ "%I - hour, using a 12-hour clock (01 to 12)\n",
+ "\n",
+ "%m - month (01 to 12)\n",
+ "\n",
+ "%M - minute\n",
+ "\n",
+ "%S - second\n",
+ "\n",
+ "%T - current time, equal to %H:%M:%S\n",
+ "\n",
+ "%y - year without a century (range 00 to 99)\n",
+ "\n",
+ "%Y - year including the century"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "datetime.datetime"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from datetime import datetime\n",
+ "#10-Jan-1980\n",
+ "dt = datetime.strptime('10-Jan-1980', '%d-%b-%Y')\n",
+ "type(dt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "time data '2/3/85' does not match format '%d/%m/%Y'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m#2/28/85\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m#should be '%d/%m/%y'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'2/3/85'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%d/%m/%Y'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m/opt/conda/lib/python3.6/_strptime.py\u001b[0m in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m 563\u001b[0m \"\"\"Return a class cls instance based on the input string and the\n\u001b[1;32m 564\u001b[0m format string.\"\"\"\n\u001b[0;32m--> 565\u001b[0;31m \u001b[0mtt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfraction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_strptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 566\u001b[0m \u001b[0mtzname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/opt/conda/lib/python3.6/_strptime.py\u001b[0m in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 361\u001b[0m raise ValueError(\"time data %r does not match format %r\" %\n\u001b[0;32m--> 362\u001b[0;31m (data_string, format))\n\u001b[0m\u001b[1;32m 363\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 364\u001b[0m raise ValueError(\"unconverted data remains: %s\" %\n",
+ "\u001b[0;31mValueError\u001b[0m: time data '2/3/85' does not match format '%d/%m/%Y'"
+ ]
+ }
+ ],
+ "source": [
+ "#2/28/85\n",
+ "#should be '%d/%m/%y'\n",
+ "dt = datetime.strptime('2/3/85', '%d/%m/%Y')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "from datetime import datetime\n",
+ "def convert_date(date_str):\n",
+ " '''\n",
+ " date format: https://www.tutorialspoint.com/python/time_strptime.htm\n",
+ " existing formats:\n",
+ " 28-Jun-1989\n",
+ " 18.07.2012 00:00:00\n",
+ " 3/25/2013\n",
+ " 3/4/2014 0:00\n",
+ " '''\n",
+ " try:\n",
+ " #28-Jun-1989\n",
+ " dt = datetime.strptime(date_str, '%d-%b-%Y')\n",
+ " return dt#.strftime('%Y-%m-%d')\n",
+ " except:\n",
+ " pass\n",
+ " try:\n",
+ " #3/25/18\n",
+ " dt = datetime.strptime(date_str, '%m/%d/%y')\n",
+ " return dt#.strftime('%Y-%m-%d')\n",
+ " except:\n",
+ " pass\n",
+ " try:\n",
+ " #28-Jun-89\n",
+ " dt = datetime.strptime(date_str, '%d-%b-%y')\n",
+ " return dt#.strftime('%Y-%m-%d')\n",
+ " except:\n",
+ " pass\n",
+ " try:\n",
+ " #18.07.2012 00:00:00\n",
+ " dt = datetime.strptime(date_str, '%d.%m.%Y %H:%M:%S')\n",
+ " return dt#.strftime('%Y-%m-%d')\n",
+ " except:\n",
+ " pass\n",
+ " try:\n",
+ " #2/18/1980\n",
+ " dt = datetime.strptime(date_str, '%m/%d/%Y')\n",
+ " return dt#.strftime('%Y-%m-%d')\n",
+ " except:\n",
+ " pass\n",
+ " try:\n",
+ " #2/18/1980 00:00\n",
+ " dt = datetime.strptime(date_str, '%m/%d/%Y %H:%M')\n",
+ " return dt#.strftime('%Y-%m-%d')\n",
+ " except:\n",
+ " pass\n",
+ " print (date_str)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "df['birthday'] = df['birthday'].apply(lambda x:convert_date(x))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " birthday | \n",
+ " name | \n",
+ " first_name | \n",
+ " last_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1980-01-10 | \n",
+ " Jessen H | \n",
+ " Jessen | \n",
+ " H | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1985-02-28 | \n",
+ " Vic A | \n",
+ " Vic | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1975-01-16 | \n",
+ " Linden L | \n",
+ " Linden | \n",
+ " L | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " birthday name first_name last_name\n",
+ "0 1980-01-10 Jessen H Jessen H\n",
+ "1 1985-02-28 Vic A Vic A\n",
+ "2 1975-01-16 Linden L Linden L"
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.tslib.Timestamp"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(df.loc[0,'birthday'])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}