forked from hse-aml/natural-language-processing
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
158 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,3 +102,5 @@ ENV/ | |
|
||
# Data for assignments | ||
data/ | ||
|
||
.idea |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Natural Language Processing course resources | ||
https://www.coursera.org/learn/language-processing | ||
|
||
## Running on Google Colab | ||
Google has released its own flavour of Jupyter called Colab, which has free GPUs! | ||
|
||
Here's how you can use it: | ||
1. Open https://colab.research.google.com | ||
2. Click **GITHUB** tab, paste https://github.com/hse-aml/natural-language-processing and press Enter | ||
3. Choose the notebook you want to open, e.g. week1/week1-MultilabelClassification.ipynb | ||
4. Click **File -> Save a copy in Drive...** to save your progress in Google Drive | ||
5. Click **Runtime -> Change runtime type** and select **GPU** in Hardware accelerator box | ||
6. Execute the following code in the first cell: | ||
```python | ||
! wget https://raw.githubusercontent.com/hse-aml/natural-language-processing/colab/setup_google_colab.py -O setup_google_colab.py | ||
import setup_google_colab | ||
setup_google_colab.setup_week1() # change to the week you're working on | ||
``` | ||
7. If you run many notebooks on Colab, they can continue to eat up memory, | ||
you can kill them with `! pkill -9 python3` and check with `! nvidia-smi` that GPU memory is freed. | ||
|
||
**Known issues:** | ||
* No support for `ipywidgets`, so we cannot use fancy `tqdm` progress bars. | ||
For now, we use a simplified version of a progress bar suitable for Colab. | ||
* Blinking animation with `IPython.display.clear_output()`. | ||
It's usable, but still looking for a workaround. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
from __future__ import print_function | ||
|
||
|
||
class SimpleTqdm(): | ||
def __init__(self, iterable=None, total=None, **kwargs): | ||
self.iterable = list(iterable) if iterable is not None else None | ||
self.total = len(self.iterable) if self.iterable is not None else total | ||
assert self.iterable is not None or self.total is not None | ||
self.current_step = 0 | ||
self.print_frequency = max(self.total // 50, 1) | ||
self.desc = "" | ||
|
||
def set_description_str(self, desc): | ||
self.desc = desc | ||
|
||
def set_description(self, desc): | ||
self.desc = desc | ||
|
||
def update(self, steps): | ||
last_print_step = (self.current_step // self.print_frequency) * self.print_frequency | ||
i = 1 | ||
while last_print_step + i * self.print_frequency <= self.current_step + steps: | ||
print("*", end='') | ||
i += 1 | ||
self.current_step += steps | ||
|
||
def close(self): | ||
print("\n" + self.desc) | ||
|
||
def __iter__(self): | ||
assert self.iterable is not None | ||
self.index = 0 | ||
return self | ||
|
||
def __next__(self): | ||
if self.index < self.total: | ||
element = self.iterable[self.index] | ||
self.update(1) | ||
self.index += 1 | ||
return element | ||
else: | ||
self.close() | ||
raise StopIteration | ||
|
||
|
||
def tqdm_notebook_failsafe(*args, **kwargs): | ||
try: | ||
import tqdm | ||
tqdm.monitor_interval = 0 # workaround for https://github.com/tqdm/tqdm/issues/481 | ||
return tqdm.tqdm_notebook(*args, **kwargs) | ||
except: | ||
# tqdm is broken on Google Colab | ||
return SimpleTqdm(*args, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
import os | ||
|
||
|
||
def download_github_code(path): | ||
filename = path.rsplit("/")[-1] | ||
# TODO replace with master after merge | ||
os.system("wget https://raw.githubusercontent.com/hse-aml/natural-language-processing/colab/{} -O {}".format(path, filename)) | ||
|
||
|
||
def setup_common(): | ||
os.system("pip install tqdm") | ||
os.system("pip install backports.weakref==1.0.post1") | ||
os.system("pip install ChatterBot==0.7.6") | ||
os.system("pip install enum34==1.1.6") | ||
os.system("pip install funcsigs==1.0.2") | ||
os.system("pip install gensim==3.1.0") | ||
os.system("pip install jedi==0.11.0") | ||
os.system("pip install libarchive==0.4.4") | ||
os.system("pip install mock==2.0.0") | ||
os.system("pip install parso==0.1.0") | ||
os.system("pip install pbr==3.1.1") | ||
os.system("pip install regex==2017.11.9") | ||
|
||
download_github_code("common/download_utils.py") | ||
download_github_code("common/tqdm_utils.py") | ||
download_github_code("common/__init__.py") | ||
os.system("mkdir common") | ||
os.system("mv download_utils.py tqdm_utils.py __init__.py common/") | ||
|
||
|
||
def setup_week1(): | ||
setup_common() | ||
download_github_code("week1/grader.py") | ||
download_github_code("week1/metrics.py") | ||
|
||
|
||
def setup_week2(): | ||
setup_common() | ||
download_github_code("week2/evaluation.py") | ||
|
||
|
||
def setup_week3(): | ||
setup_common() | ||
download_github_code("week3/grader.py") | ||
download_github_code("week3/util.py") | ||
|
||
|
||
def setup_week4(): | ||
setup_common() | ||
|
||
|
||
def setup_project(): | ||
setup_common() | ||
download_github_code("project/dialogue_manager.py") | ||
download_github_code("project/main_bot.py") | ||
download_github_code("project/utils.py") | ||
|
||
|
||
def setup_honor(): | ||
setup_common() | ||
download_github_code("honor/datasets.py") | ||
download_github_code("honor/example.py") | ||
download_github_code("honor/download_cornell.sh") | ||
download_github_code("honor/download_opensubs.sh") |