libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
proteinpresenceabsencematrix.cpp
Go to the documentation of this file.
1/**
2 * \file protein/proteinpresenceabsencematrix.cpp
3 * \date 07/02/2024
4 * \author Olivier Langella
5 * \brief presence/absence matrix of amino acid code along the protein sequence
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of PAPPSOms++.
13 *
14 * PAPPSOms++ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * PAPPSOms++ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
30#include "proteinintegercode.h"
31
35
39
40
46
50{
51 m_presenceAbsenceMatrix = other.m_presenceAbsenceMatrix;
52
53 return *this;
54}
55
56
57const boost::numeric::ublas::matrix<
60{
61 return m_presenceAbsenceMatrix;
62}
63
64void
66 const pappso::ProteinIntegerCode &coded_protein,
67 const std::vector<uint32_t> &code_list_from_spectrum)
68{
69 const std::vector<std::uint8_t> &seq_aa_code = coded_protein.getSeqAaCode();
70
71 m_presenceAbsenceMatrix.resize(seq_aa_code.size(), 5);
72 std::vector<std::uint8_t>::const_iterator it_aa = seq_aa_code.begin();
73 auto it_couple = coded_protein.getPeptideCodedFragment(2).begin();
74 auto it_trio = coded_protein.getPeptideCodedFragment(3).begin();
75 auto it_quatro = coded_protein.getPeptideCodedFragment(4).begin();
76 auto it_cinqo = coded_protein.getPeptideCodedFragment(5).begin();
77
78 for(std::size_t i = 0; i < seq_aa_code.size(); i++)
79 {
80 if(std::binary_search(code_list_from_spectrum.begin(),
81 code_list_from_spectrum.end(),
82 (std::uint32_t)(*it_aa)))
83 {
84 // presence
85 m_presenceAbsenceMatrix(i, 0) =
86 ProteinPresenceAbsenceMatrixElement::present;
87 }
88 else
89 {
90 m_presenceAbsenceMatrix(i, 0) =
91 ProteinPresenceAbsenceMatrixElement::absent;
92 }
93 if(std::binary_search(code_list_from_spectrum.begin(),
94 code_list_from_spectrum.end(),
95 *it_couple))
96 {
97 // presence
98 m_presenceAbsenceMatrix(i, 1) =
99 ProteinPresenceAbsenceMatrixElement::present;
100 }
101 else
102 {
103 m_presenceAbsenceMatrix(i, 1) =
104 ProteinPresenceAbsenceMatrixElement::absent;
105 }
106 if(std::binary_search(code_list_from_spectrum.begin(),
107 code_list_from_spectrum.end(),
108 *it_trio))
109 {
110 // presence
111 m_presenceAbsenceMatrix(i, 2) =
112 ProteinPresenceAbsenceMatrixElement::present;
113 }
114 else
115 {
116 m_presenceAbsenceMatrix(i, 2) =
117 ProteinPresenceAbsenceMatrixElement::absent;
118 }
119 if(std::binary_search(code_list_from_spectrum.begin(),
120 code_list_from_spectrum.end(),
121 *it_quatro))
122 {
123 // presence
124 m_presenceAbsenceMatrix(i, 3) =
125 ProteinPresenceAbsenceMatrixElement::present;
126 }
127 else
128 {
129 m_presenceAbsenceMatrix(i, 3) =
130 ProteinPresenceAbsenceMatrixElement::absent;
131 }
132 if(std::binary_search(code_list_from_spectrum.begin(),
133 code_list_from_spectrum.end(),
134 *it_cinqo))
135 {
136 // presence
137 m_presenceAbsenceMatrix(i, 4) =
138 ProteinPresenceAbsenceMatrixElement::present;
139 }
140 else
141 {
142 m_presenceAbsenceMatrix(i, 4) =
143 ProteinPresenceAbsenceMatrixElement::absent;
144 }
145 it_aa++;
146 it_couple++;
147 it_trio++;
148 it_quatro++;
149 it_cinqo++;
150 }
151}
152
153
154std::vector<double>
156{
157
158 std::vector<double> convolution_score;
159
160 for(std::size_t ipos = 0; ipos < m_presenceAbsenceMatrix.size1(); ipos++)
161 {
162 convolution_score.push_back(convolutionKernel(ipos));
163 }
164
165 return convolution_score;
166}
167
168double
170 std::size_t seq_position, std::size_t aa_fragment_size) const
171{
172 if(m_presenceAbsenceMatrix(seq_position, aa_fragment_size) ==
173 ProteinPresenceAbsenceMatrixElement::present)
174 {
175 switch(aa_fragment_size)
176 {
177 case 0:
178 return 1;
179 case 1:
180 return 3;
181 case 2:
182 return 6;
183 case 3:
184 return 8;
185 case 4:
186 return 10;
187 default:
188 break;
189 }
190 }
191 else
192 {
193 // absent
194 switch(aa_fragment_size)
195 {
196 case 0:
197 return 0.1;
198 case 1:
199 return 0.3;
200 case 2:
201 return 0.6;
202 case 3:
203 return 0.8;
204 case 4:
205 return 1;
206 default:
207 break;
208 }
209 }
210 return 0.1;
211}
212
213double
215 std::size_t position) const
216{
217 double score = 0;
218
219 std::size_t size_seq = m_presenceAbsenceMatrix.size1();
220 // single :
221 double single_score = 0;
222 std::size_t endpos = std::min(position + 5, size_seq);
223 for(std::size_t ipos = position; ipos < endpos; ipos++)
224 {
225 single_score += getScore(ipos, 0);
226 }
227
228 // duo
229 double duo_score = 0;
230 endpos = std::min(position + 4, size_seq);
231 for(std::size_t ipos = position; ipos < endpos; ipos++)
232 {
233 duo_score += getScore(ipos, 1);
234 }
235
236
237 // trio
238 double trio_score = 0;
239 endpos = std::min(position + 3, size_seq);
240 for(std::size_t ipos = position; ipos < endpos; ipos++)
241 {
242 trio_score += getScore(ipos, 2);
243 }
244
245
246 // quatro
247 double quatro_score = 0;
248 endpos = std::min(position + 2, size_seq);
249 for(std::size_t ipos = position; ipos < endpos; ipos++)
250 {
251 quatro_score += getScore(ipos, 3);
252 }
253
254 // cinqo
255 score += getScore(position, 4);
256 return score * single_score * duo_score * trio_score * quatro_score;
257}
const std::vector< std::uint32_t > & getPeptideCodedFragment(std::size_t size) const
const std::vector< std::uint8_t > & getSeqAaCode() const
std::vector< double > convolution() const
process convolution of spectrum code list along protein sequence
double getScore(std::size_t seq_position, std::size_t aa_fragment_size) const
ProteinPresenceAbsenceMatrix & operator=(const ProteinPresenceAbsenceMatrix &other)
double convolutionKernel(std::size_t position) const
void fillMatrix(const pappso::ProteinIntegerCode &coded_protein, const std::vector< uint32_t > &code_list_from_spectrum)
boost::numeric::ublas::matrix< ProteinPresenceAbsenceMatrixElement > m_presenceAbsenceMatrix
const boost::numeric::ublas::matrix< pappso::ProteinPresenceAbsenceMatrix::ProteinPresenceAbsenceMatrixElement > & getPresenceAbsenceMatrix() const
transform protein amino acid sequence into vectors of amino acid codes
presence/absence matrix of amino acid code along the protein sequence