Source code for motif_conformations

"""Calculate the internal coordinates of each instance of a sequence motif in
protein structures

Author: Adriaan Lategan
"""

import argparse

from Bio.PDB.Polypeptide import PPBuilder

from internal_coord import InternalCoordinates
from pdb_io import PdbQueries, PdbReader, PdbQueryCsv, CsvWriter

HAS_HEADER = True


[docs]def get_arguments() -> argparse.Namespace: """Fetch command-line arguments Returns ------- argparse.Namespace """ argument_parser = argparse.ArgumentParser("Calculate the internal " "coordinates of each instance " "of a sequence motif in protein " "structures") argument_parser.add_argument('motif', type=str, help='the amino acid sequence to find and ' 'extract from the protein structures' ) argument_parser.add_argument('-q', '--query_list', type=str, help='path to the file specifying the names, ' 'file paths, and chain ids of the ' 'protein structures to search for the ' 'motif. If no query_list is specified,' 'it will read all files in ' 'structure_directory.' ) argument_parser.add_argument('structure_directory', type=str, help='path to the directory containing ' 'protein structure files' ) argument_parser.add_argument('structure_format', choices=['pdb', 'cif'], type=str, default='pdb', help='format of the protein structure files: ' 'either "pdb" or "cif"' ) argument_parser.add_argument('-g', '--gzipped', action='store_true', help='structure files are compressed using ' 'gzip' ) argument_parser.add_argument('output_file', type=str, help='file path for the output csv file' ) arguments = argument_parser.parse_args() return arguments
[docs]def motif_to_coordinates( motif: str, query_list: str, pdb_directory: str, pdb_format: str, gzipped: bool, output_file: str ) -> None: """ Read the protein structure files in the specified directory, search for the motif sequence in each structure and output the internal coordinates of the motifs as a csv file Parameters ---------- motif : str an amino acid sequence to find in the protein structures query_list : str a csv file specifying protein file paths and polymer instances pdb_directory : str a directory containing protein structure files pdb_format : str the format of the protein structure files. Either "pdb" or "cif" gzipped : bool true if the protein structure files are compressed with gzip, false if uncompressed output_file : str name of the file to which the internal coordinates are written """ has_header = HAS_HEADER pdb_reader = PdbReader(pdb_directory, pdb_format, gzipped) internal = InternalCoordinates() if query_list: chain_file = PdbQueryCsv(query_list, has_header) pdb_queries: PdbQueries = chain_file.read else: pdb_queries: PdbQueries = pdb_reader.directory_queries residue_data = (internal.get_coordinates(polypeptide.chain, residue) for query in pdb_queries for polypeptide in query.get_polypeptides(pdb_reader, PPBuilder()) for fragment in polypeptide.find_motif(motif) for residue in fragment ) fields = ['Protein', 'Model', 'Chain', 'Position', 'Residue Name', 'Coordinate Type', 'Coordinate ID', 'Coordinate Value' ] csv = CsvWriter(output_file, fields) csv.write_headings() for residue in residue_data: for coordinate in residue.coordinates: field_values = [residue.protein, f'{residue.model}', residue.chain, f'{residue.position}', residue.residue_name, coordinate.coordinate_type, coordinate.coordinate_id, f'{coordinate.coordinate_value:f}' ] csv.write_line(field_values) csv.close()
if __name__ == "__main__": arguments = get_arguments() motif_to_coordinates(arguments.motif, arguments.query_list, arguments.structure_directory, arguments.structure_format, arguments.gzipped, arguments.output_file )