lgatto · September 2, 2021 15:19
diff --git a/mzdeltas.R b/mzdeltas.R
 ##' @title Compute the MZ deltas
 ##'
 ##' @description
 ##'
 ##' The M/Z delta plot illustrates the suitability of MS2 spectra for
 ##' identification by plotting the M/Z differences of the most intense
 ##' peaks. The resulting histogram should optimally shown outstanding
 ##' bars at amino acid residu masses. The plots have been described in
 ##' Foster et al. 2011.
 ##'
 ##' Only a certain percentage of most intense MS2 peaks are taken into
 ##' account to use the most significant signal. Default value is 10%
 ##' (see `percentage` argument). The difference between peaks is then
 ##' computed for all individual spectra and their distribution is
 ##' plotted as a histogram where single bars represent 1 M/Z
 ##' differences. Delta M/Z between 40 and 200 are plotted by default,
 ##' to encompass the residue masses of all amino acids and several
 ##' common contaminants, although this can be changes with the `xlim`
 ##' argument.
 ##'
 ##' In addition to the processing described above, isobaric reporter
 ##' tag peaks and the precursor peak can also be removed from the MS2
 ##' spectrum, to avoid interence with the fragment peaks.
 ##'
 ##' Note that figures in Foster et al. 2011 have been produced and
 ##' optimised for centroided data. It is recommended to use centroided
 ##' data.
 ##'
 ##' @references
 ##'
 ##' Foster JM, Degroeve S, Gatto L, Visser M, Wang R, Griss
 ##' J, et al. A posteriori quality control for the curation and reuse
 ##' of public proteomics data. Proteomics. 2011;11:
 ##' 2182–2194. http://dx.doi.org/10.1002/pmic.201000602
 ##'
 ##' @param object An instance of class `Spectra()`.
 ##'
 ##' @param BPPARAM An optional `BiocParallelParam` instance
 ##'     determining the parallel back-end to be used during
 ##'     evaluation. Default is to use `BiocParallel::bpparam()`. See
 ##'     `?BiocParallel::bpparam` for details.
 ##'
 ##' @param ... Parameters `percentage` and `xlim` passed to the
 ##'     internal function.
 ##'
 ##' @param pks A `matrix` with two columns, named `mz` and
 ##'     `intensity`, containing MS2 peaks.
 ##'
 ##' @param percentage `numeric(1)` between 0 and 1 indicating the
 ##'     percentage of peaks per MS2 spectrum to include in the
 ##'     calculation. Default is 0.8.
 ##'
 ##' @param xlim `numeric(2)` with the upper and lower MZ to be used to
 ##'     calculate the MZ deltas. Default is `c(40, 200)`.
 ##'
 ##' @param aaLabels `logical(1)` defining whether the amino acids
 ##'     should be labelled on the histogram. Default is `TRUE`.
 ##'
 ##' @return `computeMzDeltas()` returns a `list` of numeric
 ##'     vectors. `plotMzDelta()` is used to visualise of M/Z delta
 ##'     distributions and returns the M/Z deltas histogram.
 ##'
 ##' @author Laurent Gatto with contributions of Guangchuang Yu.
 ##'
 ##' @rdname plotMzDelta
 ##'
 ##' @examples
 ##' library(Spectra)
 ##' library(rpx)
 ##'
 ##' px <- PXDataset("PXD000001")
 ##' f <- pxget(px, "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzXML")
 ##' sp <- Spectra(f)
 ##'
 ##' d <- computeMzDeltas(sp)
 ##' plotMzDelta(d)
 ##' ggPlotMzDelta(d)
 NULL

 ##' @rdname plotMzDelta
 ##'
 ##' @importFrom stats quantile
 compute_mz_deltas <- function(pks,
                              percentage = 0.8,
                              xlim = c(40, 200)) {
    ## only keep top intensity peaks
    sel <- pks[, "intensity"] > quantile(pks[, "intensity"], percentage)
    ## keep mz values of these top peaks
    mzs <- pks[sel, "mz"]
    ## prepare list for all delta mzs
    delta <- vector("list", length = length(mzs))
    i <- 1
    ## compute all delta mzs for 1st, 2nd, ... to last peak
    while (length(mzs) > 1) {
        m <- mzs[1]
        mzs <- mzs[-1]
        delta[[i]] <- abs(mzs - m)
        i <- i + 1
    }
    delta <- unlist(delta)
    ## only keep deltas that are relevant for aa masses
    delta[delta > xlim[1] & delta < xlim[2]]
 }

 ##' @rdname plotMzDelta
 ##'
 ##' @import BiocParallel
 computeMzDeltas <- function(object,
                            BPPARAM = BiocParallel::bpparam(),
                            ...) {
    stopifnot(require("BiocParallel"))
    BiocParallel::bplapply(peaksData(filterMsLevel(object, 2)),
                           compute_mz_deltas,
                           ...,
                           BPPARAM = BPPARAM)
 }

 ##' @rdname plotMzDelta
 ##'
 ##' @import ggplot2
 ggPlotMzDelta <- function(delta, aaLabels = TRUE) {
    stopifnot(require("ggplot2"))
    ## from PSM::getAminoAcids()
    amino_acids <-
        structure(list(AA = c("peg", "A", "R", "N", "D", "C", "E", "Q",
                              "G", "H", "I", "L", "K", "M", "F", "P", "S",
                              "T", "W", "Y", "V"),
                       ResidueMass = c(44, 71.03711, 156.10111, 114.04293,
                                       115.02694, 103.00919, 129.04259,
                                       128.05858, 57.02146, 137.05891,
                                       113.08406, 113.08406, 128.09496,
                                       131.04049, 147.06841, 97.05276,
                                       87.03203, 101.04768, 186.07931,
                                       163.06333, 99.06841),
                       Abbrev3 = c(NA, "Ala", "Arg", "Asn", "Asp", "Cys",
                                   "Glu", "Gln", "Gly", "His", "Ile",
                                   "Leu", "Lys", "Met", "Phe", "Pro",
                                   "Ser", "Thr", "Trp", "Tyr", "Val"),
                       ImmoniumIonMass = c(NA, 44.05003, 129.114,
                                           87.05584, 88.03986, 76.0221,
                                           102.0555, 101.0715, 30.03438,
                                           110.0718, 86.09698, 86.09698,
                                           101.1079, 104.0534, 120.0813,
                                           70.06568, 60.04494, 74.06059,
                                           159.0922, 136.0762, 72.08133),
                       Name = c("Polyethylene glycol", "Alanine",
                                "Arginine", "Asparagine", "Aspartic acid",
                                "Cysteine", "Glutamic acid", "Glutamine",
                                "Glycine", "Histidine", "Isoleucine",
                                "Leucine", "Lysine", "Methionine",
                                "Phenylalanine", "Proline", "Serine",
                                "Threonine", "Tryptophan", "Tyrosine",
                                "Valine"),
                       Hydrophobicity = c(NA, 0.62, -2.53, -0.78, -0.9,
                                          0.29, -0.74, -0.85, 0.48, -0.4,
                                          1.38, 1.06, -1.5, 0.64, 1.19,
                                          0.12, -0.18, -0.05, 0.81, 0.26,
                                          1.08),
                       Hydrophilicity = c(NA, -0.5, 3, 0.2, 3, -1, 3, 0.2,
                                          0, -0.5, -1.8, -1.8, 3, -1.3,
                                          -2.5, 0, 0.3, -0.4, -3.4, -2.3,
                                          -1.5),
                       SideChainMass = c(NA, 15, 101, 58, 59, 47, 73, 72,
                                         1, 82, 57, 57, 73, 75, 91, 42,
                                         31, 45, 130, 107, 43),
                       pK1 = c(NA, 2.35, 2.18, 2.18, 1.88, 1.71, 2.19,
                               2.17, 2.34, 1.78, 2.32, 2.36, 2.2, 2.28,
                               2.58, 1.99, 2.21, 2.15, 2.38, 2.2, 2.29),
                       pK2 = c(NA, 9.87, 9.09, 9.09, 9.6, 10.78, 9.67,
                               9.13, 9.6, 8.97, 9.76, 9.6, 8.9, 9.21,
                               9.24, 10.6, 9.15, 9.12, 9.39, 9.11, 9.74),
                       pI = c(NA, 6.11, 10.76, 10.76, 2.98, 5.02, 3.08,
                              5.65, 6.06, 7.64, 6.04, 6.04, 9.47, 5.74,
                              5.91, 6.3, 5.68, 5.6, 5.88, 5.63, 6.02)),
                  class = "data.frame",
                  row.names = c(NA, -21L))
    ResidueMass <- ..density.. <- NULL ## to accomodate codetools
    value <- AA <- NULL
    delta <- data.frame(value = unlist(delta))
    p <- ggplot(delta, aes(x = value)) +
        geom_histogram(aes(y = ..density..), stat = "bin", binwidth = 1) +
        xlab("M/Z delta") + ylab("Density") +
        ggtitle("Histogram of Mass Delta Distribution")
    ## Adding annotations
    if (aaLabels) {
        y_offset <- x_offset <- rep(0.5, 21)
        names(y_offset) <- names(x_offset) <- amino_acids$AA
        x_offset[c("I", "L", "K", "Q")] <- 1
        y_offset[c("V", "C")] <- 1
        y_offset[c("P", "T")] <- 0
        y_offset[c("N", "E")] <- 1
        y_offset[c("K", "Q", "I", "L")] <- 0
        y_offset[c("D", "M")] <- 0
        aa <- cbind(amino_acids, x_offset, y_offset)
        ## removing Isoleucine, as it has the same residue mass
        ## as leucine, and updating leucine's label to I/L
        aa$AA <- as.character(aa$AA)
        aa[aa$AA == "I", "ResidueMass"] <- NA
        aa[aa$AA == "L", "AA"] <- "I/L"
        ## Removing Q as it is too close to K to show
        ## up correctly and updating K label to K/Q
        aa[aa$AA == "Q", "ResidueMass"] <- NA
        aa[aa$AA == "K", "AA"] <- "K/Q"
        p <- p +
            geom_vline(data = aa,
                       aes(xintercept = ResidueMass,
                           colour = AA),
                       alpha = I(1/2)) +
            geom_text(data = aa,
                      aes(x = ResidueMass,
                          y = -0.001, label = AA,
                          vjust = y_offset,
                          hjust = x_offset),
                      size = 2.5) +
            theme(legend.position = "none")
    }
    p
 }

 plotMzDelta <- function(delta, aaLabels = TRUE) {
    ## from PSM::getAminoAcids()
    amino_acids <-
        structure(list(AA = c("peg", "A", "R", "N", "D", "C", "E", "Q",
                              "G", "H", "I", "L", "K", "M", "F", "P", "S",
                              "T", "W", "Y", "V"),
                       ResidueMass = c(44, 71.03711, 156.10111, 114.04293,
                                       115.02694, 103.00919, 129.04259,
                                       128.05858, 57.02146, 137.05891,
                                       113.08406, 113.08406, 128.09496,
                                       131.04049, 147.06841, 97.05276,
                                       87.03203, 101.04768, 186.07931,
                                       163.06333, 99.06841),
                       Abbrev3 = c(NA, "Ala", "Arg", "Asn", "Asp", "Cys",
                                   "Glu", "Gln", "Gly", "His", "Ile",
                                   "Leu", "Lys", "Met", "Phe", "Pro",
                                   "Ser", "Thr", "Trp", "Tyr", "Val"),
                       ImmoniumIonMass = c(NA, 44.05003, 129.114,
                                           87.05584, 88.03986, 76.0221,
                                           102.0555, 101.0715, 30.03438,
                                           110.0718, 86.09698, 86.09698,
                                           101.1079, 104.0534, 120.0813,
                                           70.06568, 60.04494, 74.06059,
                                           159.0922, 136.0762, 72.08133),
                       Name = c("Polyethylene glycol", "Alanine",
                                "Arginine", "Asparagine", "Aspartic acid",
                                "Cysteine", "Glutamic acid", "Glutamine",
                                "Glycine", "Histidine", "Isoleucine",
                                "Leucine", "Lysine", "Methionine",
                                "Phenylalanine", "Proline", "Serine",
                                "Threonine", "Tryptophan", "Tyrosine",
                                "Valine"),
                       Hydrophobicity = c(NA, 0.62, -2.53, -0.78, -0.9,
                                          0.29, -0.74, -0.85, 0.48, -0.4,
                                          1.38, 1.06, -1.5, 0.64, 1.19,
                                          0.12, -0.18, -0.05, 0.81, 0.26,
                                          1.08),
                       Hydrophilicity = c(NA, -0.5, 3, 0.2, 3, -1, 3, 0.2,
                                          0, -0.5, -1.8, -1.8, 3, -1.3,
                                          -2.5, 0, 0.3, -0.4, -3.4, -2.3,
                                          -1.5),
                       SideChainMass = c(NA, 15, 101, 58, 59, 47, 73, 72,
                                         1, 82, 57, 57, 73, 75, 91, 42,
                                         31, 45, 130, 107, 43),
                       pK1 = c(NA, 2.35, 2.18, 2.18, 1.88, 1.71, 2.19,
                               2.17, 2.34, 1.78, 2.32, 2.36, 2.2, 2.28,
                               2.58, 1.99, 2.21, 2.15, 2.38, 2.2, 2.29),
                       pK2 = c(NA, 9.87, 9.09, 9.09, 9.6, 10.78, 9.67,
                               9.13, 9.6, 8.97, 9.76, 9.6, 8.9, 9.21,
                               9.24, 10.6, 9.15, 9.12, 9.39, 9.11, 9.74),
                       pI = c(NA, 6.11, 10.76, 10.76, 2.98, 5.02, 3.08,
                              5.65, 6.06, 7.64, 6.04, 6.04, 9.47, 5.74,
                              5.91, 6.3, 5.68, 5.6, 5.88, 5.63, 6.02)),
                  class = "data.frame",
                  row.names = c(NA, -21L))
    col <- "grey30"
    hist(unlist(d), breaks = 200, col = col, border = col,
         ylab = "Frequency", xlab = "M/Z delta",
         main = "Histrogram of Mass Delta Distributions")
    if (aaLabels) {
        usr <- par("usr")
        aa_labels <- amino_acids$AA
        aa_labels[aa_labels == "I"] <- "I/L"
        aa_labels[aa_labels == "L"] <- ""
        aa_labels[aa_labels == "Q"] <- "Q/K"
        aa_labels[aa_labels == "K"] <- ""
        amino_acids$label <- aa_labels
        amino_acids$x_adj <- 1
        amino_acids$y_adj <- 0.97
        amino_acids$y_adj[amino_acids$label == "I/L"] <- 0.95
        amino_acids$x_adj[amino_acids$label == "I/L"] <- 0.985
        amino_acids$y_adj[amino_acids$label == "D"] <- 0.95
        amino_acids$x_adj[amino_acids$label == "D"] <- 1.008
        amino_acids$y_adj[amino_acids$label == "Q/K"] <- 0.95
        amino_acids$x_adj[amino_acids$label == "Q/K"] <- 0.985
        amino_acids$y_adj[amino_acids$label == "M"] <- 0.95
        amino_acids$x_adj[amino_acids$label == "M"] <- 1.0075
        segments(amino_acids$ResidueMass, 0,
                 amino_acids$ResidueMass, usr[4] * 0.95,
                 col = "red", lwd = 1, lty =  "dashed")
        text(amino_acids$ResidueMass  * amino_acids$x_adj,
             usr[4] * amino_acids$y_adj,
             amino_acids$label)
    }
 }
	##' @title Compute the MZ deltas
	##'
	##' @description
	##'
	##' The M/Z delta plot illustrates the suitability of MS2 spectra for
	##' identification by plotting the M/Z differences of the most intense
	##' peaks. The resulting histogram should optimally shown outstanding
	##' bars at amino acid residu masses. The plots have been described in
	##' Foster et al. 2011.
	##'
	##' Only a certain percentage of most intense MS2 peaks are taken into
	##' account to use the most significant signal. Default value is 10%
	##' (see `percentage` argument). The difference between peaks is then
	##' computed for all individual spectra and their distribution is
	##' plotted as a histogram where single bars represent 1 M/Z
	##' differences. Delta M/Z between 40 and 200 are plotted by default,
	##' to encompass the residue masses of all amino acids and several
	##' common contaminants, although this can be changes with the `xlim`
	##' argument.
	##'
	##' In addition to the processing described above, isobaric reporter
	##' tag peaks and the precursor peak can also be removed from the MS2
	##' spectrum, to avoid interence with the fragment peaks.
	##'
	##' Note that figures in Foster et al. 2011 have been produced and
	##' optimised for centroided data. It is recommended to use centroided
	##' data.
	##'
	##' @references
	##'
	##' Foster JM, Degroeve S, Gatto L, Visser M, Wang R, Griss
	##' J, et al. A posteriori quality control for the curation and reuse
	##' of public proteomics data. Proteomics. 2011;11:
	##' 2182–2194. http://dx.doi.org/10.1002/pmic.201000602
	##'
	##' @param object An instance of class `Spectra()`.
	##'
	##' @param BPPARAM An optional `BiocParallelParam` instance
	##' determining the parallel back-end to be used during
	##' evaluation. Default is to use `BiocParallel::bpparam()`. See
	##' `?BiocParallel::bpparam` for details.
	##'
	##' @param ... Parameters `percentage` and `xlim` passed to the
	##' internal function.
	##'
	##' @param pks A `matrix` with two columns, named `mz` and
	##' `intensity`, containing MS2 peaks.
	##'
	##' @param percentage `numeric(1)` between 0 and 1 indicating the
	##' percentage of peaks per MS2 spectrum to include in the
	##' calculation. Default is 0.8.
	##'
	##' @param xlim `numeric(2)` with the upper and lower MZ to be used to
	##' calculate the MZ deltas. Default is `c(40, 200)`.
	##'
	##' @param aaLabels `logical(1)` defining whether the amino acids
	##' should be labelled on the histogram. Default is `TRUE`.
	##'
	##' @return `computeMzDeltas()` returns a `list` of numeric
	##' vectors. `plotMzDelta()` is used to visualise of M/Z delta
	##' distributions and returns the M/Z deltas histogram.
	##'
	##' @author Laurent Gatto with contributions of Guangchuang Yu.
	##'
	##' @rdname plotMzDelta
	##'
	##' @examples
	##' library(Spectra)
	##' library(rpx)
	##'
	##' px <- PXDataset("PXD000001")
	##' f <- pxget(px, "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzXML")
	##' sp <- Spectra(f)
	##'
	##' d <- computeMzDeltas(sp)
	##' plotMzDelta(d)
	##' ggPlotMzDelta(d)
	NULL

	##' @rdname plotMzDelta
	##'
	##' @importFrom stats quantile
	compute_mz_deltas <- function(pks,
	percentage = 0.8,
	xlim = c(40, 200)) {
	## only keep top intensity peaks
	sel <- pks[, "intensity"] > quantile(pks[, "intensity"], percentage)
	## keep mz values of these top peaks
	mzs <- pks[sel, "mz"]
	## prepare list for all delta mzs
	delta <- vector("list", length = length(mzs))
	i <- 1
	## compute all delta mzs for 1st, 2nd, ... to last peak
	while (length(mzs) > 1) {
	m <- mzs[1]
	mzs <- mzs[-1]
	delta[[i]] <- abs(mzs - m)
	i <- i + 1
	}
	delta <- unlist(delta)
	## only keep deltas that are relevant for aa masses
	delta[delta > xlim[1] & delta < xlim[2]]
	}

	##' @rdname plotMzDelta
	##'
	##' @import BiocParallel
	computeMzDeltas <- function(object,
	BPPARAM = BiocParallel::bpparam(),
	...) {
	stopifnot(require("BiocParallel"))
	BiocParallel::bplapply(peaksData(filterMsLevel(object, 2)),
	compute_mz_deltas,
	...,
	BPPARAM = BPPARAM)
	}

	##' @rdname plotMzDelta
	##'
	##' @import ggplot2
	ggPlotMzDelta <- function(delta, aaLabels = TRUE) {
	stopifnot(require("ggplot2"))
	## from PSM::getAminoAcids()
	amino_acids <-
	structure(list(AA = c("peg", "A", "R", "N", "D", "C", "E", "Q",
	"G", "H", "I", "L", "K", "M", "F", "P", "S",
	"T", "W", "Y", "V"),
	ResidueMass = c(44, 71.03711, 156.10111, 114.04293,
	115.02694, 103.00919, 129.04259,
	128.05858, 57.02146, 137.05891,
	113.08406, 113.08406, 128.09496,
	131.04049, 147.06841, 97.05276,
	87.03203, 101.04768, 186.07931,
	163.06333, 99.06841),
	Abbrev3 = c(NA, "Ala", "Arg", "Asn", "Asp", "Cys",
	"Glu", "Gln", "Gly", "His", "Ile",
	"Leu", "Lys", "Met", "Phe", "Pro",
	"Ser", "Thr", "Trp", "Tyr", "Val"),
	ImmoniumIonMass = c(NA, 44.05003, 129.114,
	87.05584, 88.03986, 76.0221,
	102.0555, 101.0715, 30.03438,
	110.0718, 86.09698, 86.09698,
	101.1079, 104.0534, 120.0813,
	70.06568, 60.04494, 74.06059,
	159.0922, 136.0762, 72.08133),
	Name = c("Polyethylene glycol", "Alanine",
	"Arginine", "Asparagine", "Aspartic acid",
	"Cysteine", "Glutamic acid", "Glutamine",
	"Glycine", "Histidine", "Isoleucine",
	"Leucine", "Lysine", "Methionine",
	"Phenylalanine", "Proline", "Serine",
	"Threonine", "Tryptophan", "Tyrosine",
	"Valine"),
	Hydrophobicity = c(NA, 0.62, -2.53, -0.78, -0.9,
	0.29, -0.74, -0.85, 0.48, -0.4,
	1.38, 1.06, -1.5, 0.64, 1.19,
	0.12, -0.18, -0.05, 0.81, 0.26,
	1.08),
	Hydrophilicity = c(NA, -0.5, 3, 0.2, 3, -1, 3, 0.2,
	0, -0.5, -1.8, -1.8, 3, -1.3,
	-2.5, 0, 0.3, -0.4, -3.4, -2.3,
	-1.5),
	SideChainMass = c(NA, 15, 101, 58, 59, 47, 73, 72,
	1, 82, 57, 57, 73, 75, 91, 42,
	31, 45, 130, 107, 43),
	pK1 = c(NA, 2.35, 2.18, 2.18, 1.88, 1.71, 2.19,
	2.17, 2.34, 1.78, 2.32, 2.36, 2.2, 2.28,
	2.58, 1.99, 2.21, 2.15, 2.38, 2.2, 2.29),
	pK2 = c(NA, 9.87, 9.09, 9.09, 9.6, 10.78, 9.67,
	9.13, 9.6, 8.97, 9.76, 9.6, 8.9, 9.21,
	9.24, 10.6, 9.15, 9.12, 9.39, 9.11, 9.74),
	pI = c(NA, 6.11, 10.76, 10.76, 2.98, 5.02, 3.08,
	5.65, 6.06, 7.64, 6.04, 6.04, 9.47, 5.74,
	5.91, 6.3, 5.68, 5.6, 5.88, 5.63, 6.02)),
	class = "data.frame",
	row.names = c(NA, -21L))
	ResidueMass <- ..density.. <- NULL ## to accomodate codetools
	value <- AA <- NULL
	delta <- data.frame(value = unlist(delta))
	p <- ggplot(delta, aes(x = value)) +
	geom_histogram(aes(y = ..density..), stat = "bin", binwidth = 1) +
	xlab("M/Z delta") + ylab("Density") +
	ggtitle("Histogram of Mass Delta Distribution")
	## Adding annotations
	if (aaLabels) {
	y_offset <- x_offset <- rep(0.5, 21)
	names(y_offset) <- names(x_offset) <- amino_acids$AA
	x_offset[c("I", "L", "K", "Q")] <- 1
	y_offset[c("V", "C")] <- 1
	y_offset[c("P", "T")] <- 0
	y_offset[c("N", "E")] <- 1
	y_offset[c("K", "Q", "I", "L")] <- 0
	y_offset[c("D", "M")] <- 0
	aa <- cbind(amino_acids, x_offset, y_offset)
	## removing Isoleucine, as it has the same residue mass
	## as leucine, and updating leucine's label to I/L
	aa$AA <- as.character(aa$AA)
	aa[aa$AA == "I", "ResidueMass"] <- NA
	aa[aa$AA == "L", "AA"] <- "I/L"
	## Removing Q as it is too close to K to show
	## up correctly and updating K label to K/Q
	aa[aa$AA == "Q", "ResidueMass"] <- NA
	aa[aa$AA == "K", "AA"] <- "K/Q"
	p <- p +
	geom_vline(data = aa,
	aes(xintercept = ResidueMass,
	colour = AA),
	alpha = I(1/2)) +
	geom_text(data = aa,
	aes(x = ResidueMass,
	y = -0.001, label = AA,
	vjust = y_offset,
	hjust = x_offset),
	size = 2.5) +
	theme(legend.position = "none")
	}
	p
	}

	plotMzDelta <- function(delta, aaLabels = TRUE) {
	## from PSM::getAminoAcids()
	amino_acids <-
	structure(list(AA = c("peg", "A", "R", "N", "D", "C", "E", "Q",
	"G", "H", "I", "L", "K", "M", "F", "P", "S",
	"T", "W", "Y", "V"),
	ResidueMass = c(44, 71.03711, 156.10111, 114.04293,
	115.02694, 103.00919, 129.04259,
	128.05858, 57.02146, 137.05891,
	113.08406, 113.08406, 128.09496,
	131.04049, 147.06841, 97.05276,
	87.03203, 101.04768, 186.07931,
	163.06333, 99.06841),
	Abbrev3 = c(NA, "Ala", "Arg", "Asn", "Asp", "Cys",
	"Glu", "Gln", "Gly", "His", "Ile",
	"Leu", "Lys", "Met", "Phe", "Pro",
	"Ser", "Thr", "Trp", "Tyr", "Val"),
	ImmoniumIonMass = c(NA, 44.05003, 129.114,
	87.05584, 88.03986, 76.0221,
	102.0555, 101.0715, 30.03438,
	110.0718, 86.09698, 86.09698,
	101.1079, 104.0534, 120.0813,
	70.06568, 60.04494, 74.06059,
	159.0922, 136.0762, 72.08133),
	Name = c("Polyethylene glycol", "Alanine",
	"Arginine", "Asparagine", "Aspartic acid",
	"Cysteine", "Glutamic acid", "Glutamine",
	"Glycine", "Histidine", "Isoleucine",
	"Leucine", "Lysine", "Methionine",
	"Phenylalanine", "Proline", "Serine",
	"Threonine", "Tryptophan", "Tyrosine",
	"Valine"),
	Hydrophobicity = c(NA, 0.62, -2.53, -0.78, -0.9,
	0.29, -0.74, -0.85, 0.48, -0.4,
	1.38, 1.06, -1.5, 0.64, 1.19,
	0.12, -0.18, -0.05, 0.81, 0.26,
	1.08),
	Hydrophilicity = c(NA, -0.5, 3, 0.2, 3, -1, 3, 0.2,
	0, -0.5, -1.8, -1.8, 3, -1.3,
	-2.5, 0, 0.3, -0.4, -3.4, -2.3,
	-1.5),
	SideChainMass = c(NA, 15, 101, 58, 59, 47, 73, 72,
	1, 82, 57, 57, 73, 75, 91, 42,
	31, 45, 130, 107, 43),
	pK1 = c(NA, 2.35, 2.18, 2.18, 1.88, 1.71, 2.19,
	2.17, 2.34, 1.78, 2.32, 2.36, 2.2, 2.28,
	2.58, 1.99, 2.21, 2.15, 2.38, 2.2, 2.29),
	pK2 = c(NA, 9.87, 9.09, 9.09, 9.6, 10.78, 9.67,
	9.13, 9.6, 8.97, 9.76, 9.6, 8.9, 9.21,
	9.24, 10.6, 9.15, 9.12, 9.39, 9.11, 9.74),
	pI = c(NA, 6.11, 10.76, 10.76, 2.98, 5.02, 3.08,
	5.65, 6.06, 7.64, 6.04, 6.04, 9.47, 5.74,
	5.91, 6.3, 5.68, 5.6, 5.88, 5.63, 6.02)),
	class = "data.frame",
	row.names = c(NA, -21L))
	col <- "grey30"
	hist(unlist(d), breaks = 200, col = col, border = col,
	ylab = "Frequency", xlab = "M/Z delta",
	main = "Histrogram of Mass Delta Distributions")
	if (aaLabels) {
	usr <- par("usr")
	aa_labels <- amino_acids$AA
	aa_labels[aa_labels == "I"] <- "I/L"
	aa_labels[aa_labels == "L"] <- ""
	aa_labels[aa_labels == "Q"] <- "Q/K"
	aa_labels[aa_labels == "K"] <- ""
	amino_acids$label <- aa_labels
	amino_acids$x_adj <- 1
	amino_acids$y_adj <- 0.97
	amino_acids$y_adj[amino_acids$label == "I/L"] <- 0.95
	amino_acids$x_adj[amino_acids$label == "I/L"] <- 0.985
	amino_acids$y_adj[amino_acids$label == "D"] <- 0.95
	amino_acids$x_adj[amino_acids$label == "D"] <- 1.008
	amino_acids$y_adj[amino_acids$label == "Q/K"] <- 0.95
	amino_acids$x_adj[amino_acids$label == "Q/K"] <- 0.985
	amino_acids$y_adj[amino_acids$label == "M"] <- 0.95
	amino_acids$x_adj[amino_acids$label == "M"] <- 1.0075
	segments(amino_acids$ResidueMass, 0,
	amino_acids$ResidueMass, usr[4] * 0.95,
	col = "red", lwd = 1, lty = "dashed")
	text(amino_acids$ResidueMass * amino_acids$x_adj,
	usr[4] * amino_acids$y_adj,
	amino_acids$label)
	}
	}