Skip to content

Commit

Permalink
colab copy-paste
Browse files Browse the repository at this point in the history
  • Loading branch information
ZEMUSHKA committed Aug 25, 2018
1 parent 2e75054 commit fa20699
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 20 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,5 @@ ENV/

# Data for assignments
data/

.idea
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Natural Language Processing course resources
https://www.coursera.org/learn/language-processing

## Running on Google Colab
Google has released its own flavour of Jupyter called Colab, which has free GPUs!

Here's how you can use it:
1. Open https://colab.research.google.com
2. Click **GITHUB** tab, paste https://github.com/hse-aml/natural-language-processing and press Enter
3. Choose the notebook you want to open, e.g. week1/week1-MultilabelClassification.ipynb
4. Click **File -> Save a copy in Drive...** to save your progress in Google Drive
5. Click **Runtime -> Change runtime type** and select **GPU** in Hardware accelerator box
6. Execute the following code in the first cell:
```python
! wget https://raw.githubusercontent.com/hse-aml/natural-language-processing/colab/setup_google_colab.py -O setup_google_colab.py
import setup_google_colab
setup_google_colab.setup_week1() # change to the week you're working on
```
7. If you run many notebooks on Colab, they can continue to eat up memory,
you can kill them with `! pkill -9 python3` and check with `! nvidia-smi` that GPU memory is freed.

**Known issues:**
* No support for `ipywidgets`, so we cannot use fancy `tqdm` progress bars.
For now, we use a simplified version of a progress bar suitable for Colab.
* Blinking animation with `IPython.display.clear_output()`.
It's usable, but still looking for a workaround.
2 changes: 2 additions & 0 deletions common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
27 changes: 7 additions & 20 deletions common/download_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,24 @@
# -*- coding: utf-8 -*-
import os
import shutil

try:
import tqdm
# Address problem in tqdm library. For details see: https://github.com/tqdm/tqdm/issues/481
tqdm.monitor_interval = 0
except ImportError:
tqdm = None

import requests
import tqdm_utils


REPOSITORY_PATH="https://github.com/hse-aml/natural-language-processing"
REPOSITORY_PATH = "https://github.com/hse-aml/natural-language-processing"


def download_file(url, file_path):
r = requests.get(url, stream=True)
total_size = int(r.headers.get('content-length'))
try:
with open(file_path, 'wb', buffering=16*1024*1024) as f:
if tqdm:
bar = tqdm.tqdm_notebook(total=total_size, unit='B', unit_scale=True)
bar.set_description(os.path.split(file_path)[-1])

bar = tqdm_utils.tqdm_notebook_failsafe(total=total_size, unit='B', unit_scale=True)
bar.set_description(os.path.split(file_path)[-1])
for chunk in r.iter_content(32 * 1024):
f.write(chunk)
if tqdm:
bar.update(len(chunk))

if tqdm:
bar.close()
else:
print("File {!r} successfully downloaded".format(file_path))
bar.update(len(chunk))
bar.close()
except Exception:
print("Download failed")
finally:
Expand Down
55 changes: 55 additions & 0 deletions common/tqdm_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function


class SimpleTqdm():
def __init__(self, iterable=None, total=None, **kwargs):
self.iterable = list(iterable) if iterable is not None else None
self.total = len(self.iterable) if self.iterable is not None else total
assert self.iterable is not None or self.total is not None
self.current_step = 0
self.print_frequency = max(self.total // 50, 1)
self.desc = ""

def set_description_str(self, desc):
self.desc = desc

def set_description(self, desc):
self.desc = desc

def update(self, steps):
last_print_step = (self.current_step // self.print_frequency) * self.print_frequency
i = 1
while last_print_step + i * self.print_frequency <= self.current_step + steps:
print("*", end='')
i += 1
self.current_step += steps

def close(self):
print("\n" + self.desc)

def __iter__(self):
assert self.iterable is not None
self.index = 0
return self

def __next__(self):
if self.index < self.total:
element = self.iterable[self.index]
self.update(1)
self.index += 1
return element
else:
self.close()
raise StopIteration


def tqdm_notebook_failsafe(*args, **kwargs):
try:
import tqdm
tqdm.monitor_interval = 0 # workaround for https://github.com/tqdm/tqdm/issues/481
return tqdm.tqdm_notebook(*args, **kwargs)
except:
# tqdm is broken on Google Colab
return SimpleTqdm(*args, **kwargs)
66 changes: 66 additions & 0 deletions setup_google_colab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os


def download_github_code(path):
filename = path.rsplit("/")[-1]
# TODO replace with master after merge
os.system("wget https://raw.githubusercontent.com/hse-aml/natural-language-processing/colab/{} -O {}".format(path, filename))


def setup_common():
os.system("pip install tqdm")
os.system("pip install backports.weakref==1.0.post1")
os.system("pip install ChatterBot==0.7.6")
os.system("pip install enum34==1.1.6")
os.system("pip install funcsigs==1.0.2")
os.system("pip install gensim==3.1.0")
os.system("pip install jedi==0.11.0")
os.system("pip install libarchive==0.4.4")
os.system("pip install mock==2.0.0")
os.system("pip install parso==0.1.0")
os.system("pip install pbr==3.1.1")
os.system("pip install regex==2017.11.9")

download_github_code("common/download_utils.py")
download_github_code("common/tqdm_utils.py")
download_github_code("common/__init__.py")
os.system("mkdir common")
os.system("mv download_utils.py tqdm_utils.py __init__.py common/")


def setup_week1():
setup_common()
download_github_code("week1/grader.py")
download_github_code("week1/metrics.py")


def setup_week2():
setup_common()
download_github_code("week2/evaluation.py")


def setup_week3():
setup_common()
download_github_code("week3/grader.py")
download_github_code("week3/util.py")


def setup_week4():
setup_common()


def setup_project():
setup_common()
download_github_code("project/dialogue_manager.py")
download_github_code("project/main_bot.py")
download_github_code("project/utils.py")


def setup_honor():
setup_common()
download_github_code("honor/datasets.py")
download_github_code("honor/example.py")
download_github_code("honor/download_cornell.sh")
download_github_code("honor/download_opensubs.sh")

0 comments on commit fa20699

Please sign in to comment.