################################################################## # author & copyright: envibee , all rights reserved ############## # date: 12-09-2022 ############################################### # contact: mloos@envibee.ch ###################################### ################################################################## ################################################################## # debugs & parameter/workflow checks & init output ############### if(FALSE){ # -> test/debug purpose only # assign("profileList", profileList_pos, envir = as.environment(".GlobalEnv")) MS2_masses <<- MS2_masses MS2_setup <<- MS2_setup MS2_max_peaks <<- MS2_max_peaks search_targetsuspects <<- TRUE search_insourcefrag <<- FALSE search_homologues <<- FALSE search_MS2masses <<- TRUE in_mode <<- "positive" #search_targetsuspects <- TRUE #search_insourcefrag <- TRUE #search_homologues <- TRUE #search_MS2masses <- TRUE stop("\nSTART DEBUGGING_1") } if(logfile$version < 4.327) stop("This PFAS comparison script must be run with enviMass versions >= 4.327. Please update enviMass!") if(any(is.na(match("in_mode", objects())))) stop("Mode unknown in script in_source_fragments") # defined by enviMass if(any(is.na( match(c("search_targetsuspects", "search_insourcefrag", "search_homologues", "search_MS2masses"), objects()) ))) stop("Missing parameter in script PfasProfileFilter - please check whether all parameters (search_targetsuspects, search_insourcefrag, search_homologues, search_MS2masses) were defined in the Comparison which sources this script!") if(search_MS2masses == TRUE) if(any(is.na( match(c("MS2_masses", "MS2_setup", "MS2_max_peaks"), objects()) ))) stop("Missing parameter in script PfasProfileFilter - please check whether all parameters MS2 parameters (MS2_masses, MS2_setup, MS2_max_peaks) were defined in the Comparison which sources this script!") if(search_targetsuspects){ if(logfile$workflow[["target_screen"]] == "no") stop("Problem in script PfasProfileFilter for search_targetsuspects <- TRUE: target screening is not enabled.") if(logfile$workflow[["components_profiles"]] == "no") stop("Problem in script PfasProfileFilter for search_targetsuspects <- TRUE: profile componentization is not enabled.") targets <- read.table(file = file.path(logfile[[1]], "dataframes", "targets.txt"), header = TRUE, sep = "\t", colClasses = "character") #if(sum(grepl("pfas", targets$tag1, fixed = TRUE)) == 0) stop("Problem in script PfasProfileFilter for search_targetsuspects <- TRUE: none of your target compounds are marked with substring pfas for their tag1") } if(search_insourcefrag){ if(logfile$workflow[["target_screen"]] == "no") stop("Problem in script PfasProfileFilter for search_insourcefrag <- TRUE: target screening is not enabled.") if(logfile$workflow[["components_profiles"]] == "no") stop("Problem in script PfasProfileFilter for search_insourcefrag <- TRUE: profile componentization is not enabled.") targets <- read.table(file = file.path(logfile[[1]], "dataframes", "targets.txt"), header = TRUE, sep = "\t", colClasses = "character") if(sum(grepl("fragment", targets$tag1, fixed = TRUE)) == 0) stop("Problem in script PfasProfileFilter for search_insourcefrag <- TRUE: none of your target compounds are marked with substring fragment for their tag1") } if(search_homologues){ if(logfile$workflow[["homologues"]] == "no") stop("Problem in script PfasProfileFilter for search_homologues <- TRUE: target screening is not enabled.") if(logfile$workflow[["components_profiles"]] == "no") stop("Problem in script PfasProfileFilter for search_homologues <- TRUE: profile componentization is not enabled.") if(in_mode == "positive") profileList_PLP_path <- file.path(as.character(logfile[[1]]), "results", "profileList_pos_plp.msraw") if(in_mode == "negative") profileList_PLP_path <- file.path(as.character(logfile[[1]]), "results", "profileList_neg_plp.msraw") } if(search_targetsuspects | search_insourcefrag | search_homologues){ if(in_mode == "positive"){ if(!file.exists(file.path(logfile[[1]], "results", "links_profiles_pos"))) stop("Problem in script PfasProfileFilter: links_profiles_pos not found - has the profile componentization and the target screening been enabled n the Workflow options? Please revise") load(file.path(as.character(logfile[[1]]), "results", "links_profiles_pos"), envir = as.environment(".GlobalEnv")) # each entry with 6 lists itself: targets, IS, EIC_correl, isotop, adducts, homol assign("links_profiles", links_profiles_pos, envir = as.environment(".GlobalEnv")) rm(links_profiles_pos, envir = as.environment(".GlobalEnv")) } if(in_mode == "negative"){ if(!file.exists(file.path(logfile[[1]], "results", "links_profiles_neg"))) stop("Problem in script PfasProfileFilter: links_profiles_neg not found - has the profile componentization and the target screening been enabled n the Workflow options? Please revise") load(file.path(as.character(logfile[[1]]), "results", "links_profiles_neg"), envir = as.environment(".GlobalEnv")) # each entry with 6 lists itself: targets, IS, EIC_correl, isotop, adducts, homol assign("links_profiles", links_profiles_neg, envir = as.environment(".GlobalEnv")) rm(links_profiles_neg, envir = as.environment(".GlobalEnv")) } } if(search_MS2masses){ if(!(MS2_setup %in% c("DDA", "DIA"))) stop("Invalid MS2_setup parameter. Must be either DDA or DIA, and in quotes.") if(is.character(logfile$method_setup)) stop("\nProblem in script PfasProfileFilter for search_MS2masses <- TRUE: no method setup found in logfile - this setup must be included to analyse any MS2 data.") if(!length(MS2_masses)) stop("\nProblem in script PfasProfileFilter for search_MS2masses <- TRUE: no MS2_masses defined.") } outcome <- rep(1111, nrow(profileList$index_prof)) ################################################################## ################################################################## # on search_targetsuspects ####################################### if(search_targetsuspects){ cat("\n doing search_targetsuspects ...") ############################################################## targets <- read.table(file = file.path(logfile[[1]], "dataframes", "targets.txt"), header = TRUE, sep = "\t", colClasses = "character") targets <- targets[!grepl("fragment", targets$tag1, fixed = TRUE),, drop = FALSE] x <- profileList$index_prof[, "links"] target_match <- sapply( x, function(x, targets, links_profiles){ if(x != 0){ if(length(links_profiles[[x]]$targ)){ if(dim(links_profiles[[x]]$targ)[1]){ z <- strsplit(links_profiles[[x]]$targ[,1], "_") ID_tar <- sapply(z, function(z) z[[1]]) add_tar <- sapply(z, function(z) z[[2]]) score_tar <- links_profiles[[x]]$targ[,3] Name_tar <- targets$Name[match(ID_tar, targets$ID)] tar_res <- c() for(n in 1:length(ID_tar)){ if(is.na(Name_tar[n])) next tar_res <- c(tar_res, paste0( Name_tar[n], " - ", add_tar[n], "(", round(score_tar[n], digits = 3), ")" )) } if(!length(tar_res)) return("-") # = only fragment matches tar_res <- paste(tar_res, collapse = ", ") return(tar_res) }else return("-") }else return("-") }else return("-") }, targets, links_profiles ) ############################################################## cat(" done.") #target_match[target_match != "-"] outcome[target_match != "-"] <- outcome[target_match != "-"] + 1000 # gives: target_match } ################################################################## ################################################################## # on search_insourcefrag ######################################### # -> profile itself must not be a fragment ####################### if(search_insourcefrag){ cat("\n doing search_insourcefrag ...") ############################################################## # is_insource_fragment -> profiles has match with an in-source-fragment from the target lists # with_insource_fragment -> profile co-elutes (EIC-based) with an is_insource_fragment-profile targets <- read.table(file = file.path(logfile[[1]], "dataframes", "targets.txt"), header = TRUE, sep = "\t", colClasses = "character") targets <- targets[grepl("fragment", targets$tag1, fixed = TRUE),, drop = FALSE] x <- profileList$index_prof[, "links"] is_insource_fragment <- sapply( x, function(x, targets, links_profiles){ if(x != 0){ if(length(links_profiles[[x]]$targ)){ if(dim(links_profiles[[x]]$targ)[1]){ z <- strsplit(links_profiles[[x]]$targ[,1], "_") ID_tar <- sapply(z, function(z) z[[1]]) add_tar <- sapply(z, function(z) z[[2]]) score_tar <- links_profiles[[x]]$targ[,3] Name_tar <- targets$Name[match(ID_tar, targets$ID)] tar_res <- c() for(n in 1:length(ID_tar)){ if(is.na(Name_tar[n])) next tar_res <- c(tar_res, paste0( Name_tar[n], " / ", add_tar[n], " / ", round(score_tar[n], digits = 3) )) } if(!length(tar_res)) return("-") # = only fragment matches tar_res <- paste(tar_res, collapse = ", ") return(tar_res) }else return("-") }else return("-") }else return("-") }, targets, links_profiles ) #is_insource_fragment[is_insource_fragment != "-"] those <- which(is_insource_fragment != "-") those <- profileList$index_prof[those, "profile_ID"] # just to be safe if(!length(those)) with_insource_fragment <- rep("-", ncol(profileList$index_prof)) else{ x <- profileList$index_prof[, "links"] with_insource_fragment <- sapply( x, function(x, those, links_profiles){ if(x != 0){ if(links_profiles[[x]]$ID %in% those) return("-") # not to report back in-source-fragments themselves if(length(links_profiles[[x]]$group$other)){ these <- match(those, links_profiles[[x]]$group$other) these <- those[!is.na(these)] if(length(these)){ # for same fragment -> filter best score match got_frag <- is_insource_fragment[these] got_frag <- strsplit(got_frag, ", ", fixed = TRUE)[[1]] got_frag <- data.frame( sapply(got_frag, function(x) trimws(strsplit(x, "/")[[1]][[1]])), sapply(got_frag, function(x) trimws(strsplit(x, "/")[[1]][[2]])), sapply(got_frag, function(x) as.numeric(trimws(strsplit(x, "/")[[1]][[3]]))) ) got_frag <- got_frag[order(got_frag[, 1], got_frag[, 3], decreasing = TRUE),, drop = FALSE] got_frag <- got_frag[!duplicated(got_frag[, 1]),, drop = FALSE] got_frag <- got_frag[order(got_frag[, 1], decreasing = FALSE),, drop = FALSE] # re-order to alphabetic got_frag <- apply(got_frag, MARGIN = 1, function(x){ paste0(x[1]," - ", x[2], " (", x[3], ")") }) got_frag <- paste(got_frag, collapse = ", ") return(got_frag) }else return("-") }else return("-") }else return("-") }, those, links_profiles ) } ############################################################## cat(" done.") #with_insource_fragment[with_insource_fragment != "-"] outcome[with_insource_fragment != "-"] <- outcome[with_insource_fragment != "-"] + 100 # gives: with_insource_fragment } ################################################################## ################################################################## # on search_homologues ########################################### if(search_homologues){ cat("\n doing search_homologues ...") ############################################################## x <- profileList$index_prof[, "links"] in_series <- sapply( x, function(x, links_profiles, profileList){ if(x != 0){ if(length(links_profiles[[x]]$files_in_homol)){ # get peak IDs for files_in_homol at_prof <- match(links_profiles[[x]]$ID, profileList[["index_prof"]][, 4]) peak_out <- enviMass:::read_profile_peaks( file_path = profileList_PLP_path, get_col = c(FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE), from_row = profileList[["index_prof"]][at_prof, 1][[1]], to_row = profileList[["index_prof"]][at_prof, 2][[1]] ) those <- match(links_profiles[[x]]$files_in_homol, peak_out[, 3]) peak_out <- peak_out[those,, drop = FALSE] if(nrow(peak_out) > 5) peak_out <- peak_out[order(peak_out[, 1], decreasing = TRUE), ][1:5, ] these <- apply(peak_out[, 2:3, drop = FALSE], MARGIN = 1, FUN = function(x){ paste0(x[2], "(", x[1], ")")}) return(paste(these, collapse = ", ")) }else return("-") }else return("-") }, links_profiles, profileList ) ############################################################## cat(" done.") #load(file = file.path(logfile[[1]], "results", "componentization", "homologues", as.character(5))) #load(file = file.path(logfile[[1]], "results", "componentization", "homologues", "full_5")) #in_series[in_series != "-"] outcome[in_series != "-"] <- outcome[in_series != "-"] + 10 # gives: in_series } ################################################################## ################################################################## # on search_MS2masses ############################################ if(search_MS2masses){ cat("\n doing search_MS2masses ...") ############################################################## len <- nrow(profileList$index_prof) has_MSMS_fragment <- rep("NA", len) for(i in 1:len){ ########################################################## profID <- profileList$index_prof[i, "profile_ID"] ########################################################## # read MS2 scans ######################################### collect_ms2 <- enviMass:::profile_readMS2( profID = profID, profileList = profileList, verbose = FALSE, do_checks = FALSE, logfile = logfile, only_lower_mass = TRUE, number_scans = MS2_max_peaks, num_max_peaks = MS2_max_peaks, num_max_peak_scans = 15, restrict_masses = MS2_masses, mztol_restrict = 2 ) # collect_ms2[[i]][[j]][[n]][[m]] # - i: over profile peaks # - j: ==1 MS1 (peak), >1 MS2 (fragments -> scanTypes) # - n: over scans (APEX scan set in list names) # - m: over mass extraction widths = scanTypes ########################################################## # extract all masses ##################################### # -> also this is only directly used for MS2_setup == "DDA" # it is left in place as prefilter for MS2_setup == "DIA" masses <- lapply(collect_ms2, function(x){ if(length(x) > 1){ masses <- c() for(n in 2:length(x)){ # over scantypes if(is.null(x[[n]])) next for(m in 1:length(x[[n]])){ # over scans if(is.null(x[[n]][[m]])) next for(k in 1:length(x[[n]][[m]])){ # over mass extraction widths (may only be one) masses <- c(masses, x[[n]][[m]][[k]][, 1]) } } } return(masses) }else return(c()) }) masses <- unlist(masses) if(!length(masses)) next ########################################################## # search masses ########################################## len2 <- length(masses) peaklist <- cbind(masses, rep(1, len2), rep(1, len2)) getit <- enviMass:::search_peak( peaklist = peaklist, mz = MS2_masses, dmz = 2, # precheck ppm = FALSE, # mmu! RT = FALSE, dRT = FALSE ) if(all(getit == "FALSE")){ has_MSMS_fragment[i] <- "none" next } ########################################################## if(MS2_setup == "DIA"){ use_scantype <- logfile$method_setup[ ((logfile$method_setup$msLevel == 2) & (logfile$method_setup$used_scans == TRUE)) , "Scan type"] collect_ms2_filtered <- try({ enviMass:::profile_filterMS2( profID = profID, profileList = profileList, collect_ms2, scantype = use_scantype, verbose = FALSE, do_checks = TRUE, logfile, do_filter_intersect = FALSE, do_filter_occurence = FALSE, do_filter_pattern = FALSE, do_filter_peakshape = TRUE, filter_peakshape_max_n_files = MS2_max_peaks, filter_peakshape_min_data_points = 5, mztol = 2, # in mmu! path = FALSE # profileList should already contain peak list for comparisons ) }) if(FALSE){ # some debugging parameters scantype = use_scantype verbose = FALSE do_checks = TRUE do_filter_intersect = FALSE do_filter_occurence = FALSE do_filter_pattern = FALSE do_filter_peakshape = TRUE filter_peakshape_max_n_files = 15 filter_peakshape_min_data_points = 5 mztol = 2 # in mmu! path = FALSE # profileList should already contain peak list for comparisons } if(class(collect_ms2_filtered) == "try-error") stop("\nProblem 1002 in comparison script - please report this to mloos@envibee.ch") if(is.character(collect_ms2_filtered[[1]])){ # -> "No data available" has_MSMS_fragment[i] <- "none" next } collect_ms2_filtered <- collect_ms2_filtered[collect_ms2_filtered[, "filter_4"] >= 0.8, , drop = FALSE] if(!nrow(collect_ms2_filtered)){ has_MSMS_fragment[i] <- "none" next } len2 <- nrow(collect_ms2_filtered) peaklist <- cbind(collect_ms2_filtered[, 1], rep(1, len2), rep(1, len2)) getit <- enviMass:::search_peak( peaklist = peaklist, mz = MS2_masses, dmz = 2, # precheck ppm = FALSE, # mmu! RT = FALSE, dRT = FALSE ) if(all(getit == "FALSE")){ has_MSMS_fragment[i] <- "none" next } has_matches <- c() for(j in 1:length(MS2_masses)){ if(getit[j] == "FALSE") next #if(grepl("/", getit[j], fixed = TRUE)) stop() has <- as.numeric(strsplit(getit[j], " / ", fixed = TRUE)[[1]]) #if(length(has)) stop() has_files <- paste(sort(unique(collect_ms2_filtered[has, "fileID"])), collapse = ",") has_max_cor <- round(max(collect_ms2_filtered[has, "filter_4"]), digits = 2) has_matches <- c(has_matches, paste0(j, " (", has_max_cor, " - ", has_files, ")")) } #if(length(has_matches) > 1) stop() has_MSMS_fragment[i] <- paste(has_matches, collapse = " / ") } ########################################################## if(MS2_setup == "DDA"){ getit <- which(getit != "FALSE") #if(length(getit) > 2) stop() getit <- paste(getit, collapse = " / ") has_MSMS_fragment[i] <- getit } ########################################################## outcome[i] <- outcome[i] + 1 } ############################################################## cat(" done.") #has_MSMS_fragment[has_MSMS_fragment != "NA"] # gives: has_MSMS_fragment } ################################################################## ################################################################## if(search_insourcefrag){ # exclude in-source fragment results outcome[is_insource_fragment != "-"] <- 1111 } these <- (outcome != 1111) #sum(outcome[these] == 2112) results <- data.frame( profileList$index_prof[these, "profile_ID"], round(profileList$index_prof[these, "mean_mz"], digits = 6), round(profileList$index_prof[these, "mean_RT"], digits = 3), round(profileList$index_prof[these, "mean_RT"] / 60, digits = 3), round(log10(profileList$index_prof[these, "mean_int_sample"]), digits = 2), round(log10(profileList$index_prof[these, "max_int_sample"]), digits = 2), round(profileList$index_prof[these, "Mass defect"], digits = 6) ) has_names <- c("Profile ID", "mean m/z", "mean RT [s]", "mean RT [min]", "mean log int", "max log int", "mass defect") if(search_targetsuspects){ results <- cbind(results, target_match[these]) has_names <- c(has_names, "Target match") } if(search_insourcefrag){ results <- cbind(results, with_insource_fragment[these]) has_names <- c(has_names, "In-source fragment match") } if(search_homologues){ results <- cbind(results, in_series[these]) has_names <- c(has_names, "Homol. series") } if(search_MS2masses){ results <- cbind(results, has_MSMS_fragment[these]) has_names <- c(has_names, "MS/MS fragment match") } names(results) <- has_names results <- results[order(outcome[these], profileList$index_prof[these, "max_int_sample"], decreasing = TRUE),, drop = FALSE] if(in_mode == "positive") out_path <- file.path(logfile[[1]], "exports", "PFAS_positive.csv") if(in_mode == "negative") out_path <- file.path(logfile[[1]], "exports", "PFAS_negative.csv") write.csv( results, file = out_path, row.names = FALSE ) ################################################################## ################################################################## those_objects <- c( # clean up! may subtract objects at beginning of script and after ... "target_match", "with_insource_fragment", "is_insource_fragment", "in_series", "has_MSMS_fragment" ) if(length(those_objects)) for(i in 1:length(those_objects)){ if(exists(those_objects[i], envir = as.environment(".GlobalEnv"))) rm(list = (those_objects[i]), envir = as.environment(".GlobalEnv")) if(exists(those_objects[i])) rm(list = those_objects[i]) } ##################################################################