OpenMS
Loading...
Searching...
No Matches
FeatureLinkerBase.cpp
Go to the documentation of this file.
1// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Marc Sturm, Clemens Groepl, Steffen Sass $
7// --------------------------------------------------------------------------
8
9#include <OpenMS/FORMAT/FileHandler.h>
10#include <OpenMS/FORMAT/FileTypes.h>
11#include <OpenMS/ANALYSIS/MAPMATCHING/FeatureGroupingAlgorithm.h>
12#include <OpenMS/DATASTRUCTURES/ListUtils.h>
13#include <OpenMS/CONCEPT/ProgressLogger.h>
14#include <OpenMS/METADATA/ExperimentalDesign.h>
15#include <OpenMS/FORMAT/ExperimentalDesignFile.h>
16
17#include <OpenMS/KERNEL/ConversionHelper.h>
18
19#include <OpenMS/APPLICATIONS/TOPPBase.h>
20
21#include <iomanip> // setw
22
23using namespace OpenMS;
24using namespace std;
25
26//-------------------------------------------------------------
27//Doxygen docu
28//-------------------------------------------------------------
29
36
37// We do not want this class to show up in the docu:
39
40class TOPPFeatureLinkerBase :
41 public TOPPBase,
42 public ProgressLogger
43{
44
45public:
46 TOPPFeatureLinkerBase(String name, String description, bool official = true) :
47 TOPPBase(name, description, official)
48 {
49 }
50
51protected:
52 void registerOptionsAndFlags_() override // only for "unlabeled" algorithms!
53 {
54 registerInputFileList_("in", "<files>", ListUtils::create<String>(""), "input files separated by blanks", true);
55 setValidFormats_("in", ListUtils::create<String>("featureXML,consensusXML"));
56 registerOutputFile_("out", "<file>", "", "Output file", true);
57 setValidFormats_("out", ListUtils::create<String>("consensusXML"));
58 registerInputFile_("design", "<file>", "", "input file containing the experimental design", false);
59 setValidFormats_("design", ListUtils::create<String>("tsv"));
60 addEmptyLine_();
61 registerFlag_("keep_subelements", "For consensusXML input only: If set, the sub-features of the inputs are transferred to the output.");
62 }
63
64 ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm,
65 bool labeled = false)
66 {
67 //-------------------------------------------------------------
68 // parameter handling
69 //-------------------------------------------------------------
70 StringList ins;
71 if (labeled)
72 {
73 ins.push_back(getStringOption_("in"));
74 }
75 else
76 {
77 ins = getStringList_("in");
78 }
79 String out = getStringOption_("out");
80
81 //-------------------------------------------------------------
82 // check for valid input
83 //-------------------------------------------------------------
84 // check if all input files have the correct type
85 FileTypes::Type file_type = FileHandler::getType(ins[0]);
86 for (Size i = 0; i < ins.size(); ++i)
87 {
88 if (FileHandler::getType(ins[i]) != file_type)
89 {
90 writeLogError_("Error: All input files must be of the same type!");
91 return ILLEGAL_PARAMETERS;
92 }
93 }
94
95 //-------------------------------------------------------------
96 // set up algorithm
97 //-------------------------------------------------------------
98 Param algorithm_param = getParam_().copy("algorithm:", true);
99 writeDebug_("Used algorithm parameters", algorithm_param, 3);
100 algorithm->setParameters(algorithm_param);
101
102 //-------------------------------------------------------------
103 // perform grouping
104 //-------------------------------------------------------------
105 // load input
106 ConsensusMap out_map;
107 StringList ms_run_locations;
108
109 String design_file;
110
111 // TODO: support design in labeled feature linker
112 if (!labeled)
113 {
114 design_file = getStringOption_("design");
115 }
116
117 if (file_type == FileTypes::CONSENSUSXML && !design_file.empty())
118 {
119 writeLogError_("Error: Using fractionated design with consensusXML als input is not supported!");
120 return ILLEGAL_PARAMETERS;
121 }
122
123 if (file_type == FileTypes::FEATUREXML)
124 {
125 OPENMS_LOG_INFO << "Linking " << ins.size() << " featureXMLs." << endl;
126
127 //-------------------------------------------------------------
128 // Extract (optional) fraction identifiers and associate with featureXMLs
129 //-------------------------------------------------------------
130
131 // determine map of fractions to MS files
132 map<unsigned, vector<String>> frac2files;
133
134 if (!design_file.empty())
135 {
136 // parse design file and determine fractions
137 ExperimentalDesign ed = ExperimentalDesignFile::load(design_file, false);
138
139 // determine if design defines more than one fraction
140 frac2files = ed.getFractionToMSFilesMapping();
141
142 writeDebug_(String("Grouping ") + String(ed.getNumberOfFractions()) + " fractions.", 3);
143
144 // check if all fractions have the same number of MS runs associated
145 if (!ed.sameNrOfMSFilesPerFraction())
146 {
147 writeLogError_("Error: Number of runs must match for every fraction!");
148 return ILLEGAL_PARAMETERS;
149 }
150 }
151 else // no design file given
152 {
153 for (Size i = 0; i != ins.size(); ++i)
154 {
155 frac2files[1].emplace_back(String("file") + String(i)); // associate each run with fraction 1
156 }
157 }
158
159 vector<FeatureMap > maps(ins.size());
160 FileHandler f;
161 FeatureFileOptions param = f.getFeatOptions();
162
163 // to save memory don't load convex hulls and subordinates
164 param.setLoadSubordinates(false);
165 param.setLoadConvexHull(false);
166 f.setFeatOptions(param);
167
168 Size progress = 0;
169 setLogType(ProgressLogger::CMD);
170 startProgress(0, ins.size(), "reading input");
171 for (Size i = 0; i < ins.size(); ++i)
172 {
173 FeatureMap tmp;
174 f.loadFeatures(ins[i], tmp, {FileTypes::FEATUREXML});
175
176 StringList ms_runs;
177 tmp.getPrimaryMSRunPath(ms_runs);
178
179 // associate mzML file with map i in consensusXML
180 if (ms_runs.size() > 1 || ms_runs.empty())
181 {
182 OPENMS_LOG_WARN << "Exactly one MS run should be associated with a FeatureMap. "
183 << ms_runs.size()
184 << " provided." << endl;
185 }
186 else
187 {
188 out_map.getColumnHeaders()[i].filename = ms_runs.front();
189 }
190 out_map.getColumnHeaders()[i].size = tmp.size();
191 out_map.getColumnHeaders()[i].unique_id = tmp.getUniqueId();
192
193 // copy over information on the primary MS run
194 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
195
196 // to save memory, remove convex hulls, subordinates:
197 for (Feature& ft : tmp)
198 {
199 String adduct;
200 String group;
201 //exception: addduct information
202 if (ft.metaValueExists(Constants::UserParam::DC_CHARGE_ADDUCTS))
203 {
204 adduct = ft.getMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS);
205 }
206 if (ft.metaValueExists(Constants::UserParam::ADDUCT_GROUP))
207 {
208 group = ft.getMetaValue(Constants::UserParam::ADDUCT_GROUP);
209 }
210 ft.getSubordinates().clear();
211 ft.getConvexHulls().clear();
212 ft.clearMetaInfo();
213 if (!adduct.empty())
214 {
215 ft.setMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS, adduct);
216 }
217 if (!group.empty())
218 {
219 ft.setMetaValue("Group", group);
220 }
221
222 }
223
224 maps[i] = tmp;
225 maps[i].updateRanges();
226
227 setProgress(progress++);
228 }
229 endProgress();
230
231 // exception for "labeled" algorithms: copy file descriptions
232 if (labeled)
233 {
234 out_map.getColumnHeaders()[1] = out_map.getColumnHeaders()[0];
235 out_map.getColumnHeaders()[0].label = "light";
236 out_map.getColumnHeaders()[1].label = "heavy";
237 ms_run_locations.push_back(ms_run_locations[0]);
238 }
239
241 // invoke feature grouping algorithm
242
243 if (frac2files.size() == 1) // group one fraction
244 {
245 algorithm->group(maps, out_map);
246 }
247 else // group multiple fractions
248 {
249 writeDebug_(String("Stored in ") + String(maps.size()) + " maps.", 3);
250 for (Size i = 1; i <= frac2files.size(); ++i)
251 {
252 vector<FeatureMap> fraction_maps;
253 // TODO FRACTIONS: here we assume that the order of featureXML is from fraction 1..n
254 // we should check if these are shuffled and error / warn
255 for (size_t feature_map_index = 0; feature_map_index != frac2files[i].size(); ++feature_map_index)
256 {
257 fraction_maps.push_back(maps[feature_map_index]);
258 }
259 algorithm->group(fraction_maps, out_map);
260 }
261 }
262 }
263 else
264 {
265 //TODO isn't it better to have this option/functionality in the FeatureGroupingAlgorithm class?
266 // Otherwise everyone has to remember e.g. to annotate the old map_index etc.
267 bool keep_subelements = getFlag_("keep_subelements");
268 vector<ConsensusMap> maps(ins.size());
269 FileHandler f;
270 for (Size i = 0; i < ins.size(); ++i)
271 {
272 f.loadConsensusFeatures(ins[i], maps[i], {FileTypes::CONSENSUSXML});
273 maps[i].updateRanges();
274 // copy over information on the primary MS run
275 StringList ms_runs;
276 maps[i].getPrimaryMSRunPath(ms_runs);
277 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
278 if (keep_subelements)
279 {
280 auto saveOldMapIndex =
281 [](PeptideIdentification &p)
282 {
283 if (p.metaValueExists("map_index"))
284 {
285 p.setMetaValue("old_map_index", p.getMetaValue("map_index"));
286 }
287 else
288 {
289 OPENMS_LOG_WARN << "Warning: map_index not found in PeptideID. The tool will not be able to assign a"
290 "consistent one. Check the settings of previous tools." << std::endl;
291 }
292 };
293 maps[i].applyFunctionOnPeptideIDs(saveOldMapIndex, true);
294 }
295 }
296 // group
297 algorithm->group(maps, out_map);
298
299 // set file descriptions:
300
301 if (!keep_subelements)
302 {
303 for (Size i = 0; i < ins.size(); ++i)
304 {
305 out_map.getColumnHeaders()[i].filename = ins[i];
306 out_map.getColumnHeaders()[i].size = maps[i].size();
307 out_map.getColumnHeaders()[i].unique_id = maps[i].getUniqueId();
308 }
309 }
310 else
311 {
312 // components of the output map are not the input maps themselves, but
313 // the components of the input maps:
314 algorithm->transferSubelements(maps, out_map);
315 }
316 }
317
318 // assign unique ids
319 out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId);
320
321 // annotate output with data processing info
322 addDataProcessing_(out_map,
323 getProcessingInfo_(DataProcessing::FEATURE_GROUPING));
324
325
326 // sort list of peptide identifications in each consensus feature by map index
327 out_map.sortPeptideIdentificationsByMapIndex();
328
329 // write output
330 FileHandler().storeConsensusFeatures(out, out_map, {FileTypes::CONSENSUSXML});
331
332 // some statistics
333 map<Size, UInt> num_consfeat_of_size;
334 for (const ConsensusFeature& cf : out_map)
335 {
336 ++num_consfeat_of_size[cf.size()];
337 }
338
339 OPENMS_LOG_INFO << "Number of consensus features:" << endl;
340 for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin();
341 i != num_consfeat_of_size.rend(); ++i)
342 {
343 OPENMS_LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6)
344 << i->second << endl;
345 }
346 OPENMS_LOG_INFO << " total: " << setw(6) << out_map.size() << endl;
347
348 return EXECUTION_OK;
349 }
350
351};
352
Definition FLASHDeconvWizardBase.cpp:26