Source code for alphastats.loader.DIANNLoader

from alphastats.loader.BaseLoader import BaseLoader
import pandas as pd


[docs]class DIANNLoader(BaseLoader): """Loader for DIA-NN output files https://github.com/vdemichev/DiaNN """ def __init__( self, file, intensity_column="[sample]", index_column="Protein.Group", sep="\t", **kwargs ): """Import DIA-NN output data report.pg_matrix.tsv Args: file (_type_): DIA-NN output file report.pg_matrix.tsv intensity_column (str, optional): columns containing the intensity column for each experiment. Defaults to "[experiment]". index_column (str, optional): column with the Protein IDs. Defaults to "Protein.Group". sep (str, optional): file separation of the input file. Defaults to "\t". """ super().__init__(file, intensity_column, index_column, sep) self.software = "DIA-NN" self._add_tag_to_sample_columns() self._add_contamination_column() def _add_tag_to_sample_columns(self): # when creating matrix sample columns wont be found when it is only specified as [experiment] # TODO this is very fragile as changes in column names can break this no_sample_column = [ "PG.Q.value", "Global.PG.Q.value", "PTM.Q.value", "PTM.Site.Confidence", "PG.Quantity", "Protein.Group", "Protein.Ids", "Protein.Names", "Genes", "First.Protein.Description", "contamination_library", ] self.rawdata.columns = [ str(col) + "_Intensity" if col not in no_sample_column else str(col) for col in self.rawdata.columns ] self.intensity_column = "[sample]_Intensity"