diff --git a/.travis.yml b/.travis.yml index 824b890..10684c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,7 @@ env: - TAGS='miniconda' - TAGS='site-tangelo' - TAGS='mysql' + - TAGS='deps-traptor' matrix: fast_finish: true diff --git a/production b/production index 909282f..e3369cc 100644 --- a/production +++ b/production @@ -180,6 +180,7 @@ memex-crawler06.istresearch.com [traptor-nodes:children] traptor-follow-nodes traptor-track-nodes +traptor-location-nodes [other] in-namenode01.nj.istresearch.com diff --git a/roles/pip/tasks/main.yml b/roles/pip/tasks/main.yml index e447590..4d518bc 100644 --- a/roles/pip/tasks/main.yml +++ b/roles/pip/tasks/main.yml @@ -12,6 +12,8 @@ apt: name: python-pip state: present + update_cache: yes + cache_valid_time: 3600 tags: pip when: ansible_os_family == "Debian" diff --git a/roles/traptor/defaults/main.yml b/roles/traptor/defaults/main.yml new file mode 100644 index 0000000..f3a758d --- /dev/null +++ b/roles/traptor/defaults/main.yml @@ -0,0 +1,31 @@ +--- +# Installation config +traptor_name: traptor +traptor_install_dir: "/opt/{{ traptor_name }}" +traptor_miniconda_path: "{{ miniconda_install_dir|default('/opt/miniconda') }}/lib/python2.7/site-packages/{{ traptor_name }}" + +# Supervisor config +traptor_num_procs: 1 +traptor_start_secs: 5 +traptor_start_retries: 100 +traptor_stderr_logfile_maxbytes: 50MB + +traptor_logger_name: "{{ traptor_name }}" +traptor_log_dir: /var/log +traptor_log_file: "{{ traptor_name }}.log" +traptor_log_max_bytes: 25000000 +traptor_log_backups: 5 + +# Traptor localsettings.py +traptor_kafka_topic: "traptor" +traptor_type: "track" + +apikeys: + - consumer_key: '' + consumer_secret: '' + access_token: '' + access_token_secret: '' + +traptor_log_level: "INFO" +traptor_redis_port: 6379 +traptor_redis_db: 0 \ No newline at end of file diff --git a/roles/traptor/handlers/main.yml b/roles/traptor/handlers/main.yml new file mode 100644 index 0000000..3d639cf --- /dev/null +++ b/roles/traptor/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart traptor + supervisorctl: + name={{ traptor_name }} + state=restarted \ No newline at end of file diff --git a/roles/traptor/meta/main.yml b/roles/traptor/meta/main.yml new file mode 100644 index 0000000..61e309e --- /dev/null +++ b/roles/traptor/meta/main.yml @@ -0,0 +1,5 @@ +--- +dependencies: + - { role: pip } + - { role: supervisord } + - { role: miniconda } \ No newline at end of file diff --git a/roles/traptor/tasks/main.yml b/roles/traptor/tasks/main.yml new file mode 100644 index 0000000..8f767f3 --- /dev/null +++ b/roles/traptor/tasks/main.yml @@ -0,0 +1,49 @@ +--- +- name: Install Miniconda packages + shell: "{{ miniconda_install_dir|default('/opt/miniconda') }}/bin/conda install {{ item }} --yes" + with_items: + - pip + tags: + - traptor + - miniconda + +- name: Miniconda pip install traptor + shell: "{{ miniconda_install_dir|default('/opt/miniconda') }}/bin/pip install traptor -U" + # notify: + # - restart wat-scrapy-crawling + tags: + - traptor + - miniconda + +- name: create traptor symlink + file: + src={{ traptor_miniconda_path }} + path={{ traptor_install_dir }} + state=link + mode=0744 + tags: traptor + +- name: create traptor log directory + file: + path={{ traptor_log_dir }}/ + state=directory + mode=0755 + tags: traptor + +- name: copy supervisord config + template: + src={{ traptor_name }}-supervisord.conf.j2 + dest={{ supervisord_programs_dir }}/{{ traptor_name }}-supervisord.conf + mode=0644 + notify: + - reread supervisord + tags: traptor + +- name: copy traptor settings + template: + src=localsettings.py.j2 + dest={{ traptor_install_dir }}/localsettings.py + mode=0644 + notify: + - restart traptor + tags: traptor diff --git a/roles/traptor/templates/localsettings.py.j2 b/roles/traptor/templates/localsettings.py.j2 new file mode 100644 index 0000000..7f3d8fa --- /dev/null +++ b/roles/traptor/templates/localsettings.py.j2 @@ -0,0 +1,46 @@ +LOG_LEVEL = "{{ traptor_log_level }}" +KAFKA_HOSTS = "{% for host in groups['kafka-nodes'] %}{{ host }}:{{ kafka_port|default(9092) }}{% if not loop.last %},{% endif %}{% endfor %}" +REDIS_HOST = "{{ groups['redis-master-node'][0] }}" +REDIS_PORT = {{ traptor_redis_port }} +REDIS_DB = {{ traptor_redis_db }} + +{% if inventory_hostname in groups['traptor-follow-nodes'] %} +# Twitter Streaming API 'follow' +{% for host in groups['traptor-follow-nodes'] %} +{%- if host == inventory_hostname -%} + +TRAPTOR_ID = {{ loop.index - 1}} +{%- endif -%} +{% endfor %} + +TRAPTOR_TYPE = 'follow' +{% endif %} + + +{% if inventory_hostname in groups['traptor-track-nodes'] %} +# Twitter Streaming API 'track' +{% for host in groups['traptor-track-nodes'] %} +{%- if host == inventory_hostname -%} + +TRAPTOR_ID = {{ loop.index - 1}} +{%- endif -%} +{% endfor %} + +TRAPTOR_TYPE = 'track' +{% endif %} + +KAFKA_TOPIC = "{{ traptor_kafka_topic }}" + +# Twitter API Keys +{% for host in groups['traptor-nodes'] %} +{% if loop.index <= groups['traptor-nodes'] | length %} +{%- if host == inventory_hostname -%} +APIKEYS = { + 'CONSUMER_KEY': "{{ apikeys[loop.index - 1]['consumer_key'] }}", + 'CONSUMER_SECRET': "{{ apikeys[loop.index - 1]['consumer_secret'] }}", + 'ACCESS_TOKEN': "{{ apikeys[loop.index - 1]['access_token'] }}", + 'ACCESS_TOKEN_SECRET': "{{ apikeys[loop.index - 1]['access_token_secret'] }}" +} +{%- endif -%} +{% endif %} +{% endfor %} diff --git a/roles/traptor/templates/traptor-supervisord.conf.j2 b/roles/traptor/templates/traptor-supervisord.conf.j2 new file mode 100644 index 0000000..6ca0fea --- /dev/null +++ b/roles/traptor/templates/traptor-supervisord.conf.j2 @@ -0,0 +1,14 @@ +[program:{{ traptor_name }}] +command={{ miniconda_install_dir|default('/opt/miniconda') }}/bin/python {{ traptor_install_dir }}/traptor.py --info --delay=60 +directory={{ traptor_install_dir }} +process_name=%(program_name)s +numprocs={{ traptor_num_procs }} +autostart=true +autorestart=true +exitcodes=0,2,3 +startsecs={{ traptor_start_secs }} +startretries={{ traptor_start_retries }} +stopsignal=KILL +redirect_stderr=true +stderr_logfile=AUTO +stderr_logfile_maxbytes={{ traptor_stderr_logfile_maxbytes }} diff --git a/site-infrastructure.yml b/site-infrastructure.yml index 8589713..ade0627 100644 --- a/site-infrastructure.yml +++ b/site-infrastructure.yml @@ -34,7 +34,9 @@ hosts: all roles: - miniconda - tags: miniconda + tags: + - miniconda + - deps-traptor # -------------------------- Zookeeper and Kafka ----------------------------- - name: Run zookeeper role @@ -50,6 +52,7 @@ - deps-hbase - deps-spark - deps-scrapy-cluster + - deps-traptor - name: Run kafka role hosts: kafka-nodes @@ -61,6 +64,7 @@ - site-kafka - deps-kafka - deps-scrapy-cluster + - deps-traptor # ------------------------------ ELK stack ------------------------------------ - name: Run elasticsearch master role @@ -381,3 +385,13 @@ roles: [ tangelo ] tags: - site-tangelo + +# -------------------------------- Traptor ----------------------------------- +# TODO also test traptor-track-nodes and traptor-location-nodes + +- name: Run Traptor role + hosts: traptor-track-nodes + roles: [ traptor ] + tags: + - site-traptor + - deps-traptor diff --git a/staging b/staging index 00bd01f..070f0a4 100644 --- a/staging +++ b/staging @@ -137,13 +137,16 @@ vagrant-ist-02 vagrant-ist-01 [traptor-follow-nodes] -vagrant-ist-02 [traptor-track-nodes] +vagrant-as-02 + +[traptor-location-nodes] [traptor-nodes:children] traptor-follow-nodes traptor-track-nodes +traptor-location-nodes [docker-engine] vagrant-ist-01 diff --git a/testing b/testing index b1a8d49..53990bf 100644 --- a/testing +++ b/testing @@ -132,13 +132,16 @@ travis-trusty travis-trusty [traptor-follow-nodes] -travis-trusty [traptor-track-nodes] +travis-trusty + +[traptor-location-nodes] [traptor-nodes:children] traptor-follow-nodes traptor-track-nodes +traptor-location-nodes [docker-engine] travis-trusty