From a30b0f38b3e19fa18f453e85b7869bfbf0009b28 Mon Sep 17 00:00:00 2001 From: jcollopy-tulane Date: Sat, 27 Apr 2024 15:36:28 -0500 Subject: [PATCH] working --- nlp/__init__.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/nlp/__init__.py b/nlp/__init__.py index 1f5784f..49392e1 100644 --- a/nlp/__init__.py +++ b/nlp/__init__.py @@ -14,14 +14,16 @@ # such as where data will be downloaded from. # here is an example. def write_default_config(path): - with open(path, 'wt') as w: - w.write('[data]\n') - w.write('url1 = https://raw.githubusercontent.com/tulane-cmps6730/project-reddit/main/data/train.csv\n') - w.write('file1 = %s%s%s\n' % (nlp_path, os.path.sep, 'train.csv')) - w.write('url2 = https://raw.githubusercontent.com/tulane-cmps6730/project-reddit/main/data/test.csv\n') - w.write('file2 = %s%s%s\n' % (nlp_path, os.path.sep, 'test.csv')) - w.write('url3 = https://raw.githubusercontent.com/tulane-cmps6730/project-reddit/main/data/validation.csv\n') - w.write('file3 = %s%s%s\n' % (nlp_path, os.path.sep, 'validation.csv')) + for i in range(1, 4): # Iterate over url1, url2, url3 + data_url = config.get('data', f'url{i}', fallback=None) # Fetch the 'url{i}' option + if data_url is not None: + data_file = config.get('data', f'file{i}') + print('downloading from %s to %s' % (data_url, data_file)) + r = requests.get(data_url) + with open(data_file, 'wt') as f: + f.write(r.text) + else: + print(f"No URL found for 'url{i}' in configuration.") # Find NLP_HOME path if 'NLP_HOME' in os.environ: