Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.model.migrate.versions.0067_populate_sequencer_table
"""
Migration script to populate the 'sequencer' table and it is populated using unique
entries in the 'datatx_info' column in the 'request_type' table. It also deletes the 'datatx_info'
column in the 'request_type' table and adds a foreign key to the 'sequencer' table. The
actual contents of the datatx_info column are stored as form_values.
"""
from __future__ import print_function
import logging
from json import dumps, loads
from sqlalchemy import (
Column,
ForeignKey,
Integer,
MetaData,
Table
)
from galaxy.model.custom_types import JSONType
from galaxy.model.migrate.versions.util import (
add_column,
drop_column,
engine_false,
localtimestamp,
nextval
)
log = logging.getLogger(__name__)
metadata = MetaData()
[docs]def get_latest_id(migrate_engine, table):
result = migrate_engine.execute("select id from %s order by id desc" % table)
row = result.fetchone()
if row:
return row[0]
else:
raise Exception('Unable to get the latest id in the %s table.' % table)
[docs]def create_sequencer_form_definition(migrate_engine):
'''
Create a new form_definition containing 5 fields (host, username, password,
data_dir & rename_datasets) which described the existing datatx_info json
dict in the request_type table
'''
# create new form_definition_current in the db
cmd = "INSERT INTO form_definition_current VALUES ( %s, %s, %s, %s, %s )" % (
nextval(migrate_engine, 'form_definition_current'),
localtimestamp(migrate_engine),
localtimestamp(migrate_engine),
'NULL',
engine_false(migrate_engine))
migrate_engine.execute(cmd)
# get this form_definition_current id
form_definition_current_id = get_latest_id(migrate_engine, 'form_definition_current')
# create new form_definition in the db
form_definition_name = 'Generic sequencer form'
form_definition_desc = ''
form_definition_fields = []
fields = [('Host', 'TextField'),
('User name', 'TextField'),
('Password', 'PasswordField'),
('Data directory', 'TextField')]
for index, (label, field_type) in enumerate(fields):
form_definition_fields.append({'name': 'field_%i' % index,
'label': label,
'helptext': '',
'visible': True,
'required': False,
'type': field_type,
'selectlist': [],
'layout': 'none',
'default': ''})
form_definition_fields.append({'name': 'field_%i' % len(fields),
'label': 'Prepend the experiment name and sample name to the dataset name?',
'helptext': 'Galaxy datasets are renamed by prepending the experiment name and sample name to the dataset name, ensuring dataset names remain unique in Galaxy even when multiple datasets have the same name on the sequencer.',
'visible': True,
'required': False,
'type': 'SelectField',
'selectlist': ['Do not rename',
'Preprend sample name',
'Prepend experiment name',
'Prepend experiment and sample name'],
'layout': 'none',
'default': ''})
form_definition_type = 'Sequencer Information Form'
form_definition_layout = dumps('[]')
cmd = "INSERT INTO form_definition VALUES ( %s, %s, %s, '%s', '%s', %s, '%s', '%s', '%s' )"
cmd = cmd % (nextval(migrate_engine, 'form_definition'),
localtimestamp(migrate_engine),
localtimestamp(migrate_engine),
form_definition_name,
form_definition_desc,
form_definition_current_id,
dumps(form_definition_fields),
form_definition_type,
form_definition_layout)
migrate_engine.execute(cmd)
# get this form_definition id
form_definition_id = get_latest_id(migrate_engine, 'form_definition')
# update the form_definition_id column in form_definition_current
cmd = "UPDATE form_definition_current SET latest_form_id=%i WHERE id=%i" % (form_definition_id, form_definition_current_id)
migrate_engine.execute(cmd)
return form_definition_id
[docs]def get_sequencer_id(migrate_engine, sequencer_info):
'''Get the sequencer id corresponding to the sequencer information'''
# Check if there is any existing sequencer which have the same sequencer
# information fields & values
cmd = "SELECT sequencer.id, form_values.content FROM sequencer, form_values WHERE sequencer.form_values_id=form_values.id"
result = migrate_engine.execute(cmd)
for row in result:
sequencer_id = row[0]
values = str(row[1])
if not values.strip():
continue
values = loads(values)
# proceed only if sequencer_info is a valid list
if values and isinstance(values, dict):
if sequencer_info.get('host', '') == values.get('field_0', '') \
and sequencer_info.get('username', '') == values.get('field_1', '') \
and sequencer_info.get('password', '') == values.get('field_2', '') \
and sequencer_info.get('data_dir', '') == values.get('field_3', '') \
and sequencer_info.get('rename_dataset', '') == values.get('field_4', ''):
return sequencer_id
return None
[docs]def add_sequencer(migrate_engine, sequencer_index, sequencer_form_definition_id, sequencer_info):
'''Adds a new sequencer to the sequencer table along with its form values.'''
# Create a new form values record with the supplied sequencer information
values = dumps({'field_0': sequencer_info.get('host', ''),
'field_1': sequencer_info.get('username', ''),
'field_2': sequencer_info.get('password', ''),
'field_3': sequencer_info.get('data_dir', ''),
'field_4': sequencer_info.get('rename_dataset', '')})
cmd = "INSERT INTO form_values VALUES ( %s, %s, %s, %s, '%s' )" % (nextval(migrate_engine, 'form_values'),
localtimestamp(migrate_engine),
localtimestamp(migrate_engine),
sequencer_form_definition_id,
values)
migrate_engine.execute(cmd)
sequencer_form_values_id = get_latest_id(migrate_engine, 'form_values')
# Create a new sequencer record with reference to the form value created above.
name = 'Sequencer_%i' % sequencer_index
desc = ''
version = ''
sequencer_type_id = 'simple_unknown_sequencer'
cmd = "INSERT INTO sequencer VALUES ( %s, %s, %s, '%s', '%s', '%s', '%s', %s, %s, %s )" % (
nextval(migrate_engine, 'sequencer'),
localtimestamp(migrate_engine),
localtimestamp(migrate_engine),
name,
desc,
sequencer_type_id,
version,
sequencer_form_definition_id,
sequencer_form_values_id,
engine_false(migrate_engine))
migrate_engine.execute(cmd)
return get_latest_id(migrate_engine, 'sequencer')
[docs]def update_sequencer_id_in_request_type(migrate_engine, request_type_id, sequencer_id):
'''Update the foreign key to the sequencer table in the request_type table'''
cmd = "UPDATE request_type SET sequencer_id=%i WHERE id=%i" % (sequencer_id, request_type_id)
migrate_engine.execute(cmd)
[docs]def upgrade(migrate_engine):
print(__doc__)
metadata.bind = migrate_engine
metadata.reflect()
RequestType_table = Table("request_type", metadata, autoload=True)
# create foreign key field to the sequencer table in the request_type table
col = Column("sequencer_id", Integer, ForeignKey("sequencer.id"), nullable=True)
add_column(col, RequestType_table, metadata)
# copy the sequencer information contained in the 'datatx_info' column
# of the request_type table to the form values referenced in the sequencer table
cmd = "SELECT id, name, datatx_info FROM request_type ORDER BY id ASC"
result = migrate_engine.execute(cmd)
results_list = result.fetchall()
# Proceed only if request_types exists
if len(results_list):
# In this migration script the all the contents of the datatx_info are stored as form_values
# with a pointer to the sequencer table. This way the sequencer information can be customized
# by the admin and is no longer restricted to host, username, password, data directory.
# For the existing request_types in the database, we add a new form_definition
# with these 4 fields. Then we populate the sequencer table with unique datatx_info
# column from the existing request_types.
sequencer_form_definition_id = create_sequencer_form_definition(migrate_engine)
sequencer_index = 1
for row in results_list:
request_type_id = row[0]
sequencer_info = str(row[2]) # datatx_info column
# skip if sequencer_info is empty
if not sequencer_info.strip() or sequencer_info in ['None', 'null']:
continue
sequencer_info = loads(sequencer_info.strip())
# proceed only if sequencer_info is a valid dict
if sequencer_info and isinstance(sequencer_info, dict):
# check if this sequencer has already been added to the sequencer table
sequencer_id = get_sequencer_id(migrate_engine, sequencer_info)
if not sequencer_id:
# add to the sequencer table
sequencer_id = add_sequencer(migrate_engine, sequencer_index, sequencer_form_definition_id, sequencer_info)
# now update the sequencer_id column in request_type table
update_sequencer_id_in_request_type(migrate_engine, request_type_id, sequencer_id)
sequencer_index = sequencer_index + 1
# Finally delete the 'datatx_info' column from the request_type table
drop_column('datatx_info', RequestType_table)
[docs]def downgrade(migrate_engine):
metadata.bind = migrate_engine
metadata.reflect()
RequestType_table = Table("request_type", metadata, autoload=True)
# create the 'datatx_info' column
col = Column("datatx_info", JSONType())
add_column(col, RequestType_table, metadata)
# restore the datatx_info column data in the request_type table with data from
# the sequencer and the form_values table
cmd = "SELECT request_type.id, form_values.content "\
+ " FROM request_type, sequencer, form_values "\
+ " WHERE request_type.sequencer_id=sequencer.id AND sequencer.form_values_id=form_values.id "\
+ " ORDER BY request_type.id ASC"
result = migrate_engine.execute(cmd)
for row in result:
request_type_id = row[0]
seq_values = loads(str(row[1]))
# create the datatx_info json dict
datatx_info = dumps(dict(host=seq_values.get('field_0', ''),
username=seq_values.get('field_1', ''),
password=seq_values.get('field_2', ''),
data_dir=seq_values.get('field_3', ''),
rename_dataset=seq_values.get('field_4', '')))
# update the column
cmd = "UPDATE request_type SET datatx_info='%s' WHERE id=%i" % (datatx_info, request_type_id)
migrate_engine.execute(cmd)
# delete foreign key field to the sequencer table in the request_type table
drop_column('sequencer_id', RequestType_table)