From c0f852d7048819b9649beb79a4013fc95a76474f Mon Sep 17 00:00:00 2001 From: ninanorgren Date: Fri, 6 Oct 2023 16:08:44 +0200 Subject: [PATCH] updated lectures day 2 --- lectures/Day_2.ipynb | 259 +++++++++++++--------------- lectures/Day_2.slides.embedded.html | 242 +++++++++++++------------- lectures/Day_2.slides.html | 243 +++++++++++++------------- lectures/Day_2.slides.pdf | Bin 3046846 -> 3023300 bytes 4 files changed, 366 insertions(+), 378 deletions(-) diff --git a/lectures/Day_2.ipynb b/lectures/Day_2.ipynb index 81d6718..2a668bd 100644 --- a/lectures/Day_2.ipynb +++ b/lectures/Day_2.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": { "slideshow": { "slide_type": "skip" @@ -88,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": { "slideshow": { "slide_type": "-" @@ -101,7 +101,7 @@ "bool" ] }, - "execution_count": 2, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -125,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "metadata": { "slideshow": { "slide_type": "-" @@ -138,7 +138,7 @@ "3.14" ] }, - "execution_count": 3, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": { "slideshow": { "slide_type": "-" @@ -176,7 +176,7 @@ "list" ] }, - "execution_count": 4, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -251,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": { "slideshow": { "slide_type": "-" @@ -261,10 +261,10 @@ { "data": { "text/plain": [ - "[5, 6, 7, 8, 5, 6, 7, 8, 5, 6, 7, 8]" + "14" ] }, - "execution_count": 5, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -276,7 +276,7 @@ "d = [5,6,7,8]\n", "e = 7\n", "\n", - "d * 3" + "e * a" ] }, { @@ -294,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": { "slideshow": { "slide_type": "-" @@ -307,7 +307,7 @@ "False" ] }, - "execution_count": 6, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -317,7 +317,7 @@ "b = 5\n", "c = 10\n", "b in a\n", - "b < c or c == 1\n", + "b < c and c == 1\n", "b not in a " ] }, @@ -364,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": { "slideshow": { "slide_type": "-" @@ -374,10 +374,10 @@ { "data": { "text/plain": [ - "[1, 2]" + "' ra'" ] }, - "execution_count": 7, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -388,7 +388,7 @@ "c = 'a random string'\n", "\n", "c[2]\n", - "a[:2]" + "c[1:4]" ] }, { @@ -408,7 +408,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "metadata": { "slideshow": { "slide_type": "-" @@ -416,14 +416,15 @@ }, "outputs": [ { - "data": { - "text/plain": [ - "[42, 2, 3, 4, 5]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" + "ename": "TypeError", + "evalue": "'str' object does not support item assignment", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [13]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m b \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ma\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mc\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;66;03m# mutable\u001b[39;00m\n\u001b[1;32m 3\u001b[0m c \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124ma random string\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;66;03m# immutable\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m c[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mA\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 6\u001b[0m a[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m42\u001b[39m\n\u001b[1;32m 7\u001b[0m a\n", + "\u001b[0;31mTypeError\u001b[0m: 'str' object does not support item assignment" + ] } ], "source": [ @@ -431,7 +432,7 @@ "b = ['a','b','c'] # mutable\n", "c = 'a random string' # immutable\n", "\n", - "#c[0] = 'A'\n", + "c[0] = 'A'\n", "a[0] = 42\n", "a" ] @@ -453,7 +454,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": { "slideshow": { "slide_type": "-" @@ -464,7 +465,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "(1, 2, 3, 4, 'a', 'b', 'c', [42, 43, 44])\n", "1\n", "2\n", "3\n", @@ -479,7 +479,7 @@ "source": [ "myTuple = (1,2,3,4,'a','b','c',[42,43,44])\n", "#myTuple[0] = 42\n", - "print(myTuple)\n", + "#print(myTuple)\n", "#print(len(myTuple))\n", "for i in myTuple:\n", " print(i)" @@ -502,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "metadata": { "slideshow": { "slide_type": "fragment" @@ -513,12 +513,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "7 is not in the list\n" + "3 is found in the list b\n" ] } ], "source": [ - "a = 7\n", + "a = 3\n", "b = [1,2,3,4]\n", "if a in b:\n", " print(str(a)+' is found in the list b')\n", @@ -551,7 +551,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "metadata": { "slideshow": { "slide_type": "slide" @@ -578,7 +578,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "metadata": { "slideshow": { "slide_type": "-" @@ -714,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "metadata": { "scrolled": true, "slideshow": { @@ -764,7 +764,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "metadata": { "slideshow": { "slide_type": "-" @@ -775,7 +775,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "5\n" + "['5', '12041', '.', 'A', 'T', '18075.2', 'PASS', 'AN=26;AC=2', 'GT:AD:DP:GQ:PL', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', '0/1:15,6:21:99:142,0,391', './.:0,0:0:.:.', '0/1:16,17:33:99:442,0,422']\n" ] } ], @@ -785,7 +785,7 @@ " if not line.startswith('#'):\n", " cols = line.strip().split('\\t')\n", " if cols[0] == '5':\n", - " print(cols[0])\n", + " print(cols)\n", " break\n", "fh.close()\n", "\n", @@ -805,7 +805,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "metadata": { "slideshow": { "slide_type": "-" @@ -857,7 +857,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "metadata": { "slideshow": { "slide_type": "-" @@ -910,7 +910,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "metadata": { "slideshow": { "slide_type": "-" @@ -921,7 +921,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "['0/1', '15,18', '33', '99', '489,0,357']\n", "0/1\n" ] } @@ -934,7 +933,6 @@ " if cols[0] == '5' and \\\n", " int(cols[1]) >= 1000000 and int(cols[1]) <= 1005000:\n", " geno = cols[9].split(':')[0]\n", - " print(cols[9].split(':'))\n", " print(geno)\n", " break\n", "fh.close()\n", @@ -965,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 23, "metadata": { "slideshow": { "slide_type": "-" @@ -1039,7 +1037,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 24, "metadata": { "slideshow": { "slide_type": "-" @@ -1050,33 +1048,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "5:1000080_A-T has genotype: 0/1\n", - "5:1000156_G-A has genotype: 0/1\n", - "5:1001097_C-A has genotype: 0/1\n", - "5:1001193_C-T has genotype: 0/1\n", - "5:1001245_T-C has genotype: 0/1\n", - "5:1001339_C-T has genotype: 0/1\n", - "5:1001344_G-C has genotype: 0/1\n", - "5:1001683_G-T has genotype: 0/1\n", - "5:1001755_G-A has genotype: 0/1\n", - "5:1002374_G-A has genotype: 0/1\n", - "5:1002382_G-C has genotype: 0/1\n", - "5:1002620_T-C has genotype: 0/1\n", - "5:1002722_G-A has genotype: 0/1\n", - "5:1002819_C-A has genotype: 0/1\n", - "5:1003043_G-T has genotype: 0/1\n", - "5:1003099_C-T has genotype: 0/1\n", - "5:1003135_G-A has genotype: 0/1\n", - "5:1004648_A-G has genotype: 0/1\n", - "5:1004650_A-C has genotype: 0/1\n", - "5:1004665_A-G has genotype: 0/1\n", - "5:1004702_G-T has genotype: 0/1\n", - "5:1004879_T-C has genotype: 0/1\n" + "['5:1000080_A-T', '5:1000156_G-A', '5:1001097_C-A', '5:1001193_C-T', '5:1001245_T-C', '5:1001339_C-T', '5:1001344_G-C', '5:1001683_G-T', '5:1001755_G-A', '5:1002374_G-A', '5:1002382_G-C', '5:1002620_T-C', '5:1002722_G-A', '5:1002819_C-A', '5:1003043_G-T', '5:1003099_C-T', '5:1003135_G-A', '5:1004648_A-G', '5:1004650_A-C', '5:1004665_A-G', '5:1004702_G-T', '5:1004879_T-C']\n" ] } ], "source": [ "fh = open('/mnt/c/Users/Nina/Documents/courses/Python_Beginner_Course/genotypes.vcf', 'r', encoding = 'utf-8')\n", + "res = []\n", "for line in fh:\n", " if not line.startswith('#'):\n", " cols = line.strip().split('\\t')\n", @@ -1085,8 +1063,10 @@ " geno = cols[9].split(':')[0]\n", " if geno in ['0/1', '1/1']:\n", " var = cols[0]+':'+cols[1]+'_'+cols[3]+'-'+cols[4]\n", - " print(var+' has genotype: '+geno)\n", - "fh.close()" + " # print(var+' has genotype: '+geno)\n", + " res.append(var)\n", + "fh.close()\n", + "print(res)" ] }, { @@ -1149,7 +1129,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 25, "metadata": { "slideshow": { "slide_type": "slide" @@ -1157,14 +1137,15 @@ }, "outputs": [ { - "data": { - "text/plain": [ - "'a string'" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" + "ename": "AttributeError", + "evalue": "'list' object has no attribute 'strip'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [25]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ma string\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124ma string \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mstrip()\n\u001b[0;32m----> 5\u001b[0m \u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrip\u001b[49m()\n", + "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'strip'" + ] } ], "source": [ @@ -1172,7 +1153,7 @@ "len('a string')\n", "\n", "'a string '.strip()\n", - "#[1,2,3].strip() " + "[1,2,3].strip() " ] }, { @@ -1221,7 +1202,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 26, "metadata": { "slideshow": { "slide_type": "-" @@ -1229,22 +1210,18 @@ }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "0\n", - "1\n", - "2\n", - "3\n", - "4\n" - ] + "data": { + "text/plain": [ + "range(0, 5)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "range(5)\n", - "\n", - "for i in range(5):\n", - " print(i)" + "range(5)" ] }, { @@ -1260,7 +1237,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 27, "metadata": { "slideshow": { "slide_type": "-" @@ -1273,7 +1250,7 @@ "[1, 2, 4, 23, 35, 88]" ] }, - "execution_count": 22, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1299,7 +1276,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 28, "metadata": { "slideshow": { "slide_type": "-" @@ -1307,19 +1284,24 @@ }, "outputs": [ { - "data": { - "text/plain": [ - "14" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on built-in function sum in module builtins:\n", + "\n", + "sum(iterable, /, start=0)\n", + " Return the sum of a 'start' value (default: 0) plus an iterable of numbers\n", + " \n", + " When the iterable is empty, return the start value.\n", + " This function is intended specifically for use with numeric values and may\n", + " reject non-numeric types.\n", + "\n" + ] } ], "source": [ - "sum([1,2,3,4],4)\n", - "#help(sum)" + "sum([1,2,3,4],10)\n", + "help(sum)" ] }, { @@ -1335,7 +1317,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 29, "metadata": { "slideshow": { "slide_type": "-" @@ -1345,17 +1327,18 @@ { "data": { "text/plain": [ - "int" + "['m', 'y', ' ', 's', 't', 'r', 'i', 'n', 'g']" ] }, - "execution_count": 24, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "round(3.234556, 3)\n", - "type(4)" + "a = 'my string'\n", + "list(a)" ] }, { @@ -1399,7 +1382,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 30, "metadata": { "slideshow": { "slide_type": "-" @@ -1409,16 +1392,16 @@ { "data": { "text/plain": [ - "' spaciousWith5678.'" + "'spaciou sWith5678.com'" ] }, - "execution_count": 25, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "' spaciousWith5678.com'.strip('mco')" + "' spaciou sWith5678.com\\n'.strip()" ] }, { @@ -1434,7 +1417,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 31, "metadata": { "slideshow": { "slide_type": "-" @@ -1447,7 +1430,7 @@ "['split', 'a', 'string', 'into a list ']" ] }, - "execution_count": 26, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1470,7 +1453,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 32, "metadata": { "slideshow": { "slide_type": "-" @@ -1478,19 +1461,21 @@ }, "outputs": [ { - "data": { - "text/plain": [ - "'a| |s|t|r|i|n|g| |a|l|r|e|a|d|y'" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" + "ename": "TypeError", + "evalue": "sequence item 0: expected str instance, int found", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [32]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m|\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ma string already\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ma\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mc\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124md\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: sequence item 0: expected str instance, int found" + ] } ], "source": [ "'|'.join('a string already')\n", - "#'|'.join(['a', 'b', 'c', 'd'])" + "''.join(['a', 'b', 'c', 'd'])\n", + "' '.join([1,2,3])" ] }, { @@ -1508,7 +1493,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 33, "metadata": { "slideshow": { "slide_type": "-" @@ -1518,17 +1503,17 @@ { "data": { "text/plain": [ - "False" + "True" ] }, - "execution_count": 28, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'long string'.startswith('ng',2)\n", - "'long string'.endswith('nt')" + "#'long string'.endswith('nt')" ] }, { @@ -1546,7 +1531,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 34, "metadata": { "slideshow": { "slide_type": "-" @@ -1559,7 +1544,7 @@ "'LONGRANDOMSTRING'" ] }, - "execution_count": 29, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1586,7 +1571,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 35, "metadata": { "slideshow": { "slide_type": "-" @@ -1599,7 +1584,7 @@ "[1, 2, 3, 4, 5]" ] }, - "execution_count": 30, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1732,7 +1717,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 36, "metadata": { "slideshow": { "slide_type": "-" diff --git a/lectures/Day_2.slides.embedded.html b/lectures/Day_2.slides.embedded.html index d8540ff..1d4522c 100644 --- a/lectures/Day_2.slides.embedded.html +++ b/lectures/Day_2.slides.embedded.html @@ -14682,7 +14682,7 @@

Literals