This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.model.migrate.versions.0076_fix_form_values_data_corruption

This migration script fixes the data corruption caused in the form_values
table (content json field) by migrate script 65.

import logging
from json import dumps, loads

from sqlalchemy import MetaData

from galaxy.model.custom_types import _sniffnfix_pg9_hex

log = logging.getLogger(__name__)
metadata = MetaData()

[docs]def upgrade(migrate_engine): metadata.bind = migrate_engine print(__doc__) metadata.reflect() cmd = "SELECT form_values.id as id, form_values.content as field_values, form_definition.fields as fdfields " \ + " FROM form_definition, form_values " \ + " WHERE form_values.form_definition_id=form_definition.id " \ + " ORDER BY form_values.id" result = migrate_engine.execute(cmd) corrupted_rows = 0 for row in result: # first check if loading the dict from the json succeeds # if that fails, it means that the content field is corrupted. try: field_values_dict = loads(_sniffnfix_pg9_hex(str(row['field_values']))) except Exception: corrupted_rows = corrupted_rows + 1 # content field is corrupted fields_list = loads(_sniffnfix_pg9_hex(str(row['fdfields']))) field_values_str = _sniffnfix_pg9_hex(str(row['field_values'])) try: # Encoding errors? Just to be safe. print("Attempting to fix row %s" % row['id']) print("Prior to replacement: %s" % field_values_str) except Exception: pass field_values_dict = {} # look for each field name in the values and extract its value (string) for index in range(len(fields_list)): field = fields_list[index] field_name_key = '"%s": "' % field['name'] field_index = field_values_str.find(field_name_key) if field_index == -1: # if the field name is not present the field values dict then # inform the admin that this form values cannot be fixed print("The 'content' field of row 'id' %i does not have the field '%s' in the 'form_values' table and could not be fixed by this migration script." % (int(field['id']), field['name'])) else: # check if this is the last field if index == len(fields_list) - 1: # since this is the last field, the value string lies between the # field name and the '"}' string at the end, hence len(field_values_str) - 2 value = field_values_str[field_index + len(field_name_key):len(field_values_str) - 2] else: # if this is not the last field then the value string lies between # this field name and the next field name next_field = fields_list[index + 1] next_field_index = field_values_str.find('", "%s": "' % next_field['name']) value = field_values_str[field_index + len(field_name_key):next_field_index] # clean up the value string, escape the required quoutes and newline characters value = value.replace("'", "\''")\ .replace('"', '\\\\"')\ .replace('\r', "\\\\r")\ .replace('\n', "\\\\n")\ .replace('\t', "\\\\t") # add to the new values dict field_values_dict[field['name']] = value # update the db json_values = dumps(field_values_dict) cmd = "UPDATE form_values SET content='%s' WHERE id=%i" % (json_values, int(row['id'])) migrate_engine.execute(cmd) try: print("Post replacement: %s" % json_values) except Exception: pass if corrupted_rows: print('Fixed %i corrupted rows.' % corrupted_rows) else: print('No corrupted rows found.')
[docs]def downgrade(migrate_engine): pass