Skip to content

Commit

Permalink
reworking some load_episodes arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
lbjay committed Oct 19, 2017
1 parent 25b2ee0 commit 54777af
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 24 deletions.
21 changes: 10 additions & 11 deletions harvest_cli/ocua.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,16 +250,13 @@ def get_episode(action):
help='Matterhorn rest password', required=True)
@click.option('-e', '--es-host', envvar='ES_HOST',
help="Elasticsearch host:port", default='localhost:9200')
@click.option('-i', '--es-index', envvar='EPISODE_INDEX_NAME', default='episodes',
help="Name of the index for storing episode records")
@click.option('-n', '--index-pattern', default='useractions-*',
help="useraction index pattern to query for mpids")
@click.option('--term', help="term query for mpids to index")
@click.option('--year', help="year query for mpids to index")
@click.option('-t', '--target-index', default="episodes",
help="name of index the episodes will be written to; defaults to 'episodes'")
@click.option('-s', '--source-index-pattern', help=("useraction index pattern to query for mpids; "
"e.g. 'useractions*-2017.10.*'; defaults to yesterday's index"))
@click.option('--mpid', help="Index a specific mediapackage")
@click.option('-w', '--wait', default=1,
help="Seconds to wait between batch requests")
def load_episodes(admin_host, engage_host, user, password, es_host, es_index, index_pattern, mpid, term, year, wait):
@click.option('-w', '--wait', default=1, help="Seconds to wait between batch requests")
def load_episodes(admin_host, engage_host, user, password, es_host, target_index, source_index_pattern, mpid, wait):

mh_engage = pyhorn.MHClient('http://' + engage_host, user, password, timeout=30)
mh_admin = pyhorn.MHClient('http://' + admin_host, user, password, timeout=30)
Expand All @@ -268,7 +265,9 @@ def load_episodes(admin_host, engage_host, user, password, es_host, es_index, in
if mpid is not None:
mpids = [mpid]
else:
mpids = get_mpids_from_useractions(es, index_pattern, term, year)
if source_index_pattern is None:
source_index_pattern = "useractions*-%s" % arrow.now().replace(days=-1).format("YYYY.MM.DD")
mpids = get_mpids_from_useractions(es, source_index_pattern)

for mpid in mpids:
request_params = {
Expand Down Expand Up @@ -370,7 +369,7 @@ def load_episodes(admin_host, engage_host, user, password, es_host, es_index, in
except Exception, e:
logger.error("Failed extracting workflow data for episode %s: %s", ep.id, str(e))

es.index(index=es_index,
es.index(index=target_index,
doc_type='episode',
id=mpid,
body=doc
Expand Down
14 changes: 3 additions & 11 deletions harvest_cli/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@

import logging
import json
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q

Expand Down Expand Up @@ -40,19 +39,12 @@ def get_series_for_term(es, term, year):
res = s.execute()
return [x['key'] for x in res.aggregations.series.buckets]

def get_mpids_from_useractions(es, index_pattern=None,
term=None, year=None, anonymous=True):
def get_mpids_from_useractions(es, index_pattern):
s = Search(using=es, index=index_pattern).extra(size=0)
if None not in (term, year):
s = s.filter(
Q('term', **{'episode.term': term }) & \
Q('term', ** { 'episode.year': year })
)
if not anonymous:
s = s.filter(~Q('term', huid='anonymous'))

s.aggs.bucket('mpids', 'terms', field='mpid', size=0)
res = s.execute()
if 'aggregations' not in res:
return []
return [x['key'] for x in res.aggregations.mpids.buckets]


2 changes: 1 addition & 1 deletion index_templates/episodes.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"template" : "episodes-*",
"template" : "episodes*",
"mappings" : {
"episode" : {
"_all" : {"enabled" : false},
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ click==6.7
pyhorn==0.8.1
pyloggly==0.1.0
python-dotenv==0.7.1
requests[security]==2.18.4
requests[security]==2.8.1
redis==2.10.6

# Elasticsearch 2.x
Expand Down

0 comments on commit 54777af

Please sign in to comment.