Skip to content

Commit

Permalink
Merge branch 'feature-download-bundle' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
looselycoupled committed Dec 13, 2016
2 parents 647e234 + 646bb79 commit 7a238cc
Show file tree
Hide file tree
Showing 9 changed files with 372 additions and 24 deletions.
14 changes: 13 additions & 1 deletion dataset/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from django.contrib import admin
from dataset.models import Dataset
from dataset.models import DatasetVersion
from dataset.models import DataFile
from dataset.models import License

Expand All @@ -37,12 +38,23 @@ class DataFilesInline(admin.StackedInline):
verbose_name_plural = 'files'


class DatasetVersionsInline(admin.StackedInline):
"""
Inline administration descriptor
"""

model = DatasetVersion
extra = 1
verbose_name_plural = 'versions'


class DatasetAdmin(admin.ModelAdmin):
"""
Defines the administration for a dataset in the CMS.
"""

inlines = (DataFilesInline,)
# inlines = (DataFilesInline,)
versions = (DatasetVersionsInline,)

##########################################################################
## Register Admin
Expand Down
31 changes: 25 additions & 6 deletions dataset/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
##########################################################################

from django import forms
from dataset.models import Dataset, DataFile
from dataset.models import Dataset, DataFile, DatasetVersion
from dataset.signals import bundle_version


##########################################################################
Expand All @@ -43,7 +44,8 @@ def save(self):
Save the dataset with the new meta information.
"""
self.cleaned_data['owner'] = self.request.user.profile.account
return Dataset.objects.create(**self.cleaned_data)
dataset = Dataset.objects.create(**self.cleaned_data)
return dataset


##########################################################################
Expand Down Expand Up @@ -86,8 +88,25 @@ def save(self):
"""
Associate the file with the dataset and upload to S3.
"""
return DataFile.objects.create(
dataset = self.cleaned_data['dataset'],
uploader = self.request.user,
data = self.cleaned_data['datafile']
files = []
dataset = self.cleaned_data['dataset']
if dataset.latest_version():
files = dataset.latest_version().files.all()

version = DatasetVersion.objects.create(
version=dataset.next_version_number(),
dataset=dataset,
bundle_available=False,
)
version.files = files

datafile = DataFile.objects.create(
version=version,
uploader=self.request.user,
data=self.cleaned_data['datafile']
)

# emit signal for version bundle
bundle_version.send(sender=None, instance=version)

return datafile
52 changes: 52 additions & 0 deletions dataset/migrations/0004_auto_20160814_1553.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.5 on 2016-08-14 19:53
from __future__ import unicode_literals

from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
import model_utils.fields


class Migration(migrations.Migration):

dependencies = [
('dataset', '0003_auto_20160722_0124'),
]

operations = [
migrations.CreateModel(
name='DatasetVersion',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('version', models.PositiveIntegerField()),
('bundle_available', models.BooleanField(default=False)),
],
options={
'db_table': 'dataset_versions',
'ordering': ('-created',),
'get_latest_by': 'created',
},
),
migrations.RemoveField(
model_name='datafile',
name='dataset',
),
migrations.RemoveField(
model_name='dataset',
name='version',
),
migrations.AddField(
model_name='datasetversion',
name='dataset',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='versions', to='dataset.Dataset'),
),
migrations.AddField(
model_name='datafile',
name='version',
field=models.ForeignKey(default=1, on_delete=django.db.models.deletion.CASCADE, related_name='files', to='dataset.DatasetVersion'),
preserve_default=False,
),
]
22 changes: 22 additions & 0 deletions dataset/migrations/0005_datasetversion_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.5 on 2016-08-15 01:46
from __future__ import unicode_literals

import dataset.models
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('dataset', '0004_auto_20160814_1553'),
]

operations = [
migrations.AddField(
model_name='datasetversion',
name='data',
field=models.FileField(default=None, upload_to=dataset.models.dataset_version_directory_path),
preserve_default=False,
),
]
159 changes: 153 additions & 6 deletions dataset/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,26 @@
from __future__ import unicode_literals

import os
import zipfile
import zlib
import shutil
import random
import string

from django.core.exceptions import ObjectDoesNotExist

from django.db import models
from model_utils import Choices
from trinket.utils import nullable
from trinket.utils import (nullable, memoized)
from markupfield.fields import MarkupField
from model_utils.models import TimeStampedModel
from django.core.urlresolvers import reverse
from django.core.files.base import ContentFile
import csv
import codecs

# from dataset.signals import bundle_version

##########################################################################
## Helper Models
##########################################################################
Expand Down Expand Up @@ -62,7 +71,7 @@ class Dataset(TimeStampedModel):
PRIVACY = Choices('private', 'protected', 'public')

owner = models.ForeignKey('account.Account', related_name='datasets')
version = models.PositiveIntegerField(default=1, null=False, blank=True)
# version = models.PositiveIntegerField(default=1, null=False, blank=True)
name = models.SlugField(max_length=60, null=False, allow_unicode=True)
description = models.CharField(max_length=255, **nullable)
url = models.URLField(**nullable)
Expand All @@ -76,8 +85,29 @@ class Meta:
ordering = ('-created',)
get_latest_by = 'created'

@property
def version(self):
return self.versions.latest().version

def next_version_number(self):
latest = self.latest_version()
if latest:
return latest.version + 1
else:
return 1

def latest_version(self):
try:
return self.versions.latest()
except ObjectDoesNotExist:
return None

def latest_file(self):
return self.files.latest()
version = self.latest_version()
if version:
return self.latest_version().files.latest()
else:
return None

def get_api_detail_url(self):
"""
Expand All @@ -98,6 +128,123 @@ def is_starred(self, user_id):
def __str__(self):
return self.name


##########################################################################
## Dataset Version
##########################################################################

def dataset_version_directory_path(instance, filename):
"""
File will be uploaded to:
MEDIA_ROOT/datasets/<account>/<dataset>/bundles/<name>.zip
"""
return os.path.join(
'datasets',
instance.dataset.owner.name,
instance.dataset.name,
'bundles',
instance.bundle_filename
)

class DatasetVersion(TimeStampedModel):
"""
A join model to connect DataFiles to Datasets tied to a version number
"""
version = models.PositiveIntegerField(null=False, blank=False)
dataset = models.ForeignKey('dataset.Dataset', related_name='versions')
bundle_available = models.BooleanField(default=False)
data = models.FileField(upload_to=dataset_version_directory_path)

class Meta:
db_table = "dataset_versions"
get_latest_by = 'created'
ordering = ('-created',)

@property
def bundle_filename(self):
"""
Constructs the bundle filename based off of dataset name and version number
"""
return '{}-{}.zip'.format(
self.dataset.name,
self.__str__()
)

@memoized
def temp_directory(self):
"""
Returns a path to a temporary working directory
"""
prefix = 'cultivar-'
length = 12
name = None

while not name:
tmp = prefix + ''.join(random.choice(string.ascii_lowercase)
for i in range(length))
path = os.path.abspath(os.path.join(os.sep, 'tmp', tmp))
if not os.path.isdir(path):
os.mkdir(path)
return path

def bundle(self):
"""
Public method to download files, bundle them, upload the bundle to s3
and then update the version record accordingly
"""
try:
self._download(self.temp_directory)
self._zip(self.temp_directory)
self._upload(self.temp_directory)

self.bundle_available = True
self.save()
finally:
self._clean(self.temp_directory)

def _clean(self, path):
"""
Removes the temporary work directory
"""
shutil.rmtree(path)

def _zip(self, path):
"""
Creates zip file, then adds license, readme, and uploaded files
"""
files = [os.path.join(path, f) for f in os.listdir(path)]
zf = zipfile.ZipFile(os.path.join(path, self.bundle_filename), mode='w')
try:
zf.writestr('readme.md', self.dataset.readme.raw)
zf.writestr('license.txt', self.dataset.license.text)
for f in files:
zf.write(f, compress_type=zipfile.ZIP_DEFLATED, arcname=os.path.basename(f))
finally:
zf.close()

def _upload(self, path):
"""
Uploads bundle file in given path/directory to S3
"""
upload_path = dataset_version_directory_path(self, '')
f = open(os.path.join(path, self.bundle_filename), 'rb')
ff = ContentFile(f.read())
self.data.save(upload_path, ff, save=False)

def _download(self, path):
"""
Downloads this version's files to the given path
"""
for f in self.files.all():
filename = os.path.basename(f.data.name)
with open(os.path.join(path, filename), 'wb') as fh:
fh.write(f.data.read())

def __str__(self):
return 'v{}'.format(self.version)



##########################################################################
## Data Files
##########################################################################
Expand All @@ -109,8 +256,8 @@ def dataset_directory_path(instance, filename):
"""
return os.path.join(
'datasets',
instance.dataset.owner.name,
instance.dataset.name,
instance.version.dataset.owner.name,
instance.version.dataset.name,
filename
)

Expand All @@ -123,7 +270,7 @@ class DataFile(TimeStampedModel):
DATATYPE = Choices('csv', 'json', 'xml')

uploader = models.ForeignKey('auth.User', related_name='+')
dataset = models.ForeignKey('dataset.Dataset', related_name='files')
version = models.ForeignKey('dataset.DatasetVersion', related_name='files')
data = models.FileField(upload_to=dataset_directory_path)
description = models.CharField(max_length=128, **nullable)
dimensions = models.PositiveIntegerField(default=0)
Expand Down
Loading

0 comments on commit 7a238cc

Please sign in to comment.