Source code for bioblend.galaxy.datasets
"""
Contains possible interactions with the Galaxy Datasets
"""
from bioblend.galaxy.client import Client
import shutil
import urllib2
import os
import time
import logging
log = logging.getLogger(__name__)
[docs]class DatasetClient(Client):
def __init__(self, galaxy_instance):
self.module = 'datasets'
super(DatasetClient, self).__init__(galaxy_instance)
[docs] def show_dataset(self, dataset_id, deleted=False):
"""
Display information about and/or content of a dataset. This can be a
history or a library dataset.
"""
return Client._get(self, id=dataset_id, deleted=deleted)
[docs] def download_dataset(self, dataset_id, file_path=None, use_default_filename=True, wait_for_completion=False, maxwait=12000):
"""
Downloads the dataset identified by 'id'.
:type dataset_id: string
:param dataset_id: Encoded Dataset ID
:type file_path: string
:param file_path: If the file_path argument is provided, the dataset will be streamed to disk
at that path (Should not contain filename if use_default_name=True).
If the file_path argument is not provided, the dataset content is loaded into memory
and returned by the method (Memory consumption may be heavy as the entire file
will be in memory).
:type use_default_name: boolean
:param use_default_name: If the use_default_name parameter is True, the exported
file will be saved as file_local_path/%s,
where %s is the dataset name.
If use_default_name is False, file_local_path is assumed to
contain the full file path including filename.
:type wait_for_completion: boolean
:param wait_for_completion: If wait_for_completion is True, this call will block till the dataset is ready.
If the dataset state becomes invalid, a DatasetStateException will be thrown.
:type maxwait: float
:param maxwait: Time (in seconds) to wait for dataset to complete.
If the dataset state is not complete within this time, a DatasetTimeoutException will be thrown.
:rtype: dict
:return: If a file_path argument is not provided, returns a dict containing the file_content.
Otherwise returns nothing.
"""
if wait_for_completion:
self._block_until_dataset_ready(dataset_id, maxwait=maxwait)
dataset = self.show_dataset(dataset_id)
if not dataset['state'] == 'ok':
raise DatasetStateException("Dataset not ready. Dataset id: %s, current state: %s" % (dataset_id, dataset['state']))
# Append the dataset_id to the base history contents URL
url = '/'.join([self.gi.base_url, dataset['download_url']])
if file_path is None:
r = self.gi.make_get_request(url)
return r.content
else:
req = urllib2.urlopen(url)
if use_default_filename:
file_local_path = os.path.join(file_path, dataset['name'])
else:
file_local_path = file_path
with open(file_local_path, 'wb') as fp:
shutil.copyfileobj(req, fp)
def _is_dataset_complete(self, dataset_id):
dataset = self.show_dataset(dataset_id)
state = dataset['state']
return (state == 'ok' or state == 'error')
def _block_until_dataset_ready(self, dataset_id, maxwait=12000, interval=30, raise_on_timeout=True):
"""
Wait until the dataset state changes to ok or error.
Based on: https://github.com/salimfadhley/jenkinsapi/blob/master/jenkinsapi/api.py
"""
assert maxwait > 0
assert maxwait > interval
assert interval > 0
for time_left in xrange(maxwait, 0, -interval):
if self._is_dataset_complete(dataset_id):
return
log.warn( "Waiting for dataset %s to complete. Will wait another %is" % (dataset_id, time_left))
time.sleep(interval)
if raise_on_timeout:
#noinspection PyUnboundLocalVariable
raise DatasetTimeoutException("Waited too long for dataset to complete: %s" % dataset_id)
[docs]class DatasetStateException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
[docs]class DatasetTimeoutException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)