My BibTeX Entries

@ARTICLE{derider02collagen,
  author = {DeRider, Michele L. and Wilkens, Steven J. and Waddell, Michael J. 
            and Bretscher, Lynn E. and Weinhold, Frank and Raines, Ronald T.
            and Markley, John L.},
  title = {Collagen Stability: Insights from {NMR} Spectroscopic and Hybrid 
           Density Functional Computational Investigations of the Effect of 
           Electronegative Substituents on Prolyl Ring Conformations},
  journal = {Journal of the American Chemical Society},
  volume = {124},
  number = {11},
  year = {2002},
  month = {Mar},
  pages = {2497--2505},
  abstract = {
     Collagen-like peptides of the type (Pro-Pro-Gly)(10) fold into stable 
     triple helices. An electron-withdrawing substituent at the H(gamma)(3) 
     ring position of the second proline residue stabilizes these triple 
     helices. The aim of this study was to reveal the structural and energetic
     origins of this effect. The approach was to obtain experimental NMR data
     on model systems and to use these results to validate computational
     chemical analyses of these systems. The most striking effects of an
     electron-withdrawing substituent are on the ring pucker of the substituted
     proline (Pro(i)) and on the trans/cis ratio of the Xaa(i-1)-Pro(i) peptide
     bond. NMR experiments demonstrated that N-acetylproline methyl ester
     (AcProOMe) exists in both the C(gamma)-endo and C(gamma)-exo conformations
     (with the endo conformation slightly preferred),
     N-acetyl-4(R)-fluoroproline methyl ester (Ac-4R-FlpOMe) exists almost
     exclusively in the C(gamma)-exo conformation, and
     N-acetyl-4(S)-fluoroproline methyl ester (Ac-4S-FlpOMe) exists almost
     exclusively in the C(gamma)-endo conformation. In dioxane, the
     K(trans/cis) values for AcProOMe, Ac-4R-FlpOMe, and Ac-4S-FlpOMe are 3.0,
     4.0, and 1.2, respectively. Density functional theory (DFT) calculations
     with the (hybrid) B3LYP method were in good agreement with the
     experimental data. Computational analysis with the natural bond orbital
     (NBO) paradigm shows that the pucker preference of the substituted prolyl
     ring is due to the gauche effect. The backbone torsional angles, phi and
     psi, were shown to correlate with ring pucker, which in turn correlates
     with the known phi and psi angles in collagen-like peptides. The
     difference in K(trans/cis) between AcProOMe and Ac-4R-FlpOMe is due to an
     n-->pi interaction associated with the Burg-Dunitz trajectory. The
     decrease in K(trans/cis) for Ac-4S-FlpOMe can be explained by
     destabilization of the trans isomer because of unfavorable electronic and
     steric interactions. Analysis of the results herein along with the
     structures of collagen-like peptides has led to a theory that links
     collagen stability to the interplay between the pyrrolidine ring pucker,
     phi and psi torsional angles, and peptide bond trans/cis ratio of
     substituted proline residues.},
  url = {http://pubs.acs.org/journals/jacsat/article.cgi/jacsat/2002/124/i11/pdf/ja0166904.pdf},
  pdf = {http://www.waddellinformatics.com/download/derider02collagen.pdf},
  ps = {http://www.waddellinformatics.com/download/derider02collagen.ps.gz},
  keywords = {nmr, collagen, nbo, proline}
}
@TECHREPORT{page02comparative,
  author = {Page, David and Zhan, Fenghuang and Cussens, James 
            and Waddell, Michael and Hardin, Johanna and Barlogie, Bart 
            and Shaughnessy, Jr., John},
  title = {Comparative Data Mining for Microarrays: A Case Study Based on 
           Multiple Myeloma},
  institution = {Computer Sciences Department, University of Wisconsin},
  number = {1453},
  year = {2002},
  month = {Nov},
  abstract = {
     Supervised machine learning and data mining tools have become popular for
     the analysis of gene expression microarray data. They have the potential
     to uncover new therapeutic targets for diseases, to predict how patients
     will respond to specific treatments, and to uncover regulatory
     relationships among genes in normal and disease situations. Comparative
     experiments are needed to identify the advantages of the leading
     supervised learning algorithms for microarray data, as well as to give
     direction in methodological decisions. This paper compares support vector
     machines, Bayesian networks, decision trees, boosted decision trees, and
     voting (ensembles of decision stumps) on a new microarray data set for
     cancer with over 100 samples. The paper provides evidence for several 
     important lessons for mining microarray data, including: (1) Bayes nets
     and ensembles perform at least as well as other approaches but arguably
     provide more direct insight; (2) the common practice of throwing out low
     or negative average differences, or those accompanied by an absent call,
     is a mistake; (3) looking for consistent differences in expression may 
     be more important than large differences.},
  url = {ftp://ftp.cs.wisc.edu/pub/tech-reports/reports/2002/tr1453.ps.Z},
  pdf = {http://www.waddellinformatics.com/download/page02comparative.pdf},
  ps = {http://www.waddellinformatics.com/download/page02comparative.ps.gz},
  keywords = {microarray, myeloma, genomics, algorithms, svms, bayes, trees, 
              ensembles, eov}
}
@INPROCEEDINGS{dutra03toward,
  author = {Dutra, In{\^{e}}s de Castro and Page, David and Costa, Vitor 
            Santos and Shavlik, Jude W. and Waddell, Michael},
  title = {Toward Automatic Management of Embarrassingly Parallel Applications},
  booktitle = {Euro-Par 2003. Parallel Processing, 9th International Euro-Par 
               Conference, Klagenfurt, Austria, August 26-29, 2003. 
               Proceedings},
  editor = {Harald Kosch and 
               L{\'{a}}szl{\'{o}} B{\"{o}}sz{\"{o}}rm{\'{e}}nyi and
               Hermann Hellwagner},
  publisher = {Springer-Verlag},
  series = {Lecture Notes in Computer Science},
  volume = {2790},
  isbn = {3-540-40788-X},
  month = {Aug},
  year = {2003},
  pages = {509--516},
  abstract = {
    Large-scale applications that require executing very large numbers of tasks
    are only feasible through parallelism. In this work we present a system
    that automatically handles large numbers of experiments and data in the
    context of machine learning. Our system controls all experiments, including
    re-submission of failed jobs and relies on available resource managers to
    spawn jobs through pools of machines. Our results show that we can manage
    a very large number of experiments, using a reasonable amount of idle CPU
    cycles, with very little user intervention.},
  url = {http://citeseer.nj.nec.com/599812.html},
  pdf = {http://www.waddellinformatics.com/download/dutra03toward.pdf},
  ps = {http://www.waddellinformatics.com/download/dutra03toward.ps.gz},
  keywords = {collaboration, parallelism}
}
@ARTICLE{molla04using,
  author = {Michael Molla and Michael Waddell and David Page and Jude Shavlik},
  title = {Using Machine Learning to Design and Interpret Gene-Expression 
           Microarrays},
  journal = {{AI} Magazine},
  volume = {25},
  number = {1},
  year = {2004},
  pages = {23--44},
  abstract = {
    Gene-expression microarrays, commonly called 'gene chips,' make it 
    possible to simultaneously measure the rate at which a cell or tissue is
    expressing - translating into a protein - each of its thousands of genes.
    One can use these comprehensive snapshots of biological activity to infer
    regulatory pathways in cells, identify novel targets for drug design, and
    improve the diagnosis, prognosis, and treatment planning for those 
    suffering from disease. However, the amount of data this new technology
    produces is more than one can manually analyze. Hence, the need for 
    automated analysis of microarray data offers an opportunity for machine
    learning to have a significant impact on biology and medicine. This 
    article describes microarray technology, the data it produces, and the
    types of machine-learning tasks that naturally arise with this data. It
    also reviews some of the recent prominent applications of machine learning
    to gene-chip data, points to related tasks where machine learning may have
    a further impact on biology and medicine, and describes additional types of
    interesting data that recent advances in biotechnology allow biomedical 
    researchers to collect.},
  url = {http://www.aaai.org/Papers/Magazine/Vol25/25-01/AIMag25-01-004.pdf},
  pdf = {http://www.waddellinformatics.com/download/molla04using.pdf},
  ps = {http://www.waddellinformatics.com/download/molla04using.ps.gz},
  keywords = {bioinformatics, microarray, genomics}
}
@MISC{waddell00theoretical1,
  author = {Waddell, Michael J.},
  title = {Theoretical Analysis of the Basis of Collagen Stability},
  note = {Hilldale Undergraduate/Faculty Research Seminar, Univerisity of 
          Wisconsin-Madison, Madison, Wisconsin, April 24, 2000},
  howpublished = {Seminar},
  year = {2000},
  month = {Apr},
  keywords = {collagen, nbo, proline}
}
@MISC{waddell00theoretical2,
  author = {Waddell, Michael J.},
  title = {Theoretical Analysis of the Basis of Collagen Stability},
  note = {University of Wisconsin, Department of Biochemistry},
  howpublished = {Senior Undergraduate Thesis},
  year = {2000},
  month = {May},
  pdf = {http://www.waddellinformatics.com/download/waddell00theoretical2.pdf},
  ps = {http://www.waddellinformatics.com/download/waddell00theoretical2.ps.gz},
  keywords = {collagen, nbo, proline}
}
@MISC{waddell02comparative,
  author = {Waddell, Michael and Page, David and Zhan, Fenghuang 
	    and Barlogie, Bart and Shaughnessy, Jr., John and Hardin, Johanna 
            and Cussens, James},
  title = {Comparative Data Mining for Microarrays: A Case Study Based on 
           Multiple Myeloma},
  note = {International Conference on Intelligent Systems for Molecular 
          Biology, Poster Session 1,  Edmonton, Alberta, Canada,  August 4,
	  2002},
  howpublished = {Poster},
  year = {2002},
  month = {Aug},
  abstract = {
    These studies compare SVMs, Bayesian networks, decision trees, boosted 
    decision trees and voting (ensembles of decision stumps) on a new 
    microarray data set for cancer (multiple myeloma) with over 100 samples.
    They provide evidence for several important lessons about how these 
    techniques should be used for mining microarray data. },
  pdf = {http://www.waddellinformatics.com/download/waddell02comparative.pdf},
  ps = {http://www.waddellinformatics.com/download/waddell02comparative.ps.gz},
  keywords = {genomics, microarray, myeloma, algorithms}
}
@MISC{waddell03seldi,
  author = {Waddell, Michael J.},
  title = {{SELDI Filter}: Automating the Filtering and Analysis of 
           Proteomic Mass Spectrometry Data},
  note = {Abbott Laboratories Science Intern Poster Session, Abbott Park, 
           Illinois, July 23, 2003},
  howpublished = {Poster},
  year = {2003},
  month = {July},
  abstract = {
    One of the technologies being used at Abbott for discovering biomarkers
    for disease states, toxicity and treatment is Surface Enhanced Laser 
    Desorption and Ionization (SELDI) combined with time of flight (TOF) 
    mass spectrometry.  SELDI combines highly specific sample enrichment 
    with a sensitive mass measurement, thereby allowing researchers to 
    discover very low abundant (down to 10 fmol) biomarkers.  However, the
    volume of data obtained with SELDI can quickly overwhelm the researcher's 
    ability to analyze manually. \\
    \\
    SELDI Filter is a program that I have developed during my internship at 
    Abbott to automate much of this tedious analysis process.  It integrates
    with the Ciphergen ProteinChip\copyright software that the researchers 
    are using to collect the SELDI data and presents its analysis results in 
    either Microsoft Word\copyright or Microsoft PowerPoint\copyright for 
    ease of integration into papers and presentations. \\
    \\
    SELDI Filter currently automates the following analysis methods: ANOVA, 
    Discriminant Analysis and Partition Analysis.  However, due to its modular
    design, other techniques can quickly and easily be added.},
  pdf = {http://www.waddellinformatics.com/pubs/waddell03seldi.pdf},
  ps = {http://www.waddellinformatics.com/pubs/waddell03seldi.ps.gz},
  summary = {http://www.waddellinformatics.com/download/waddell03seldi-summary.txt},
  keywords = {proteomics, seldi, spectrometry}
}
@MISC{waddell03toward,
  author = {Michael J. Waddell},
  title = {Toward the Development of Diagnostic Models Capable of 
           Distinguishing Multiple Myeloma, {MGUS}, and Normal Plasma Cells
	   Using Global Gene Expression Profiles},
  note = {Computation and Informatics in Biology and Medicine (CIBM)
          Seminar Series, Univerisity of Wisconsin-Madison, Madison, Wisconsin,
	  February 11, 2003},
  howpublished = {Seminar},
  year = {2003},
  month = {Feb},
  abstract = {
     Standard laboratory classification of the plasma cell dyscrasia monoclonal
     gammopathy of undetermined significance (MGUS) and the overt plasma cell 
     neoplasm multiple myeloma (MM) is quite accurate, yet, for the most part,
     prognostically uninformative.   Most, if not all, cancers are caused by 
     inherited or acquired genetic mutations that manifest themselves in 
     altered gene expression patterns in the clonally related cancer cells. 
     Microarray technology allows for qualitative and quantitative measurements
     of the expression levels of thousands of genes simultaneously, and it has 
     now been used both to classify cancers that are morphologically 
     indistinguishable and to predict response to therapy. However, standard 
     data analysis techniques are not trivial to employ on these large data 
     sets. We report on the application of a panel of statistical and data 
     mining methodologies to classify groups of samples based on expression 
     of 12,000 genes derived from a high density oligonucleotide microarray 
     analysis of highly purified plasma cells from newly diagnosed MM, MGUS,
     and normal healthy donors and the prediction errors for each of the models
     and each of the methods.  Additionally, we report ROC curves for the
     comparisons of MM versus MGUS and results on predicting MGUS from a model
     that distinguishes MM samples from normal samples.},
  pdf = {http://www.waddellinformatics.com/download/waddell03toward.pdf},
  ps = {http://www.waddellinformatics.com/download/waddell03toward.ps.gz},
  keywords = {genomics, microarray, myeloma}
}
@MISC{waddell03predicting,
  author = {Michael J. Waddell},
  title = {Predicting Cancer Susceptibility from Single-Nucelotide 
           Polymorphism Data: A Case Study in Multiple Myeloma},
  note = {National Library of Medicine Training Directors' Meeting 2003, 
          Bethesda, Maryland, July 9, 2003},
  howpublished = {Seminar},
  year = {2003},
  month = {July},
  pdf = {http://www.waddellinformatics.com/download/waddell03predicting.pdf},
  ps = {http://www.waddellinformatics.com/download/waddell03predicting.ps.gz},
  keywords = {snps, genomics, myeloma}
}
@MISC{waddell04modeling,
  author = {Michael J. Waddell},
  title = {Modeling Patterns in Single-Nucleotide Polymorphism Data for
           Predicting Cancer Susceptibility: A Case Study in Multiple Myeloma},
  note = {Computation and Informatics in Biology and Medicine (CIBM)
          Seminar Series, Univerisity of Wisconsin-Madison, Madison, Wisconsin,
	  March 2, 2004},
  howpublished = {Seminar},
  year = {2004},
  month = {Mar},
  abstract = {
    The past two decades have witnessed the identification of genes responsible
    for a number of inherited human disorders.  However, most of these
    successes were with disorders that are caused by single genes.
    Attempts to identify groups of genes that result in inherited disorders
    through their collective action have been largely unsuccessful.
    One reason for this difficulty is that standard genetics techniques
    are more sensitive to large, consistent changes in single genes
    than to consistent patterns of small changes in groups of genes. \\
    \\
    In order to find the groups of genes that result in these more complex 
    disorders, researchers need to identify groups of genes whose collective
    action is consistent, even though individual genes may not be.
    We propose that standard machine learning algorithms can be utilized
    to address this goal.  In this seminar, I will discuss using support
    vector machines (SMVs) to model patterns in single nucleotide polymorphisms
    (SNPs) that are associated with early versus late onset of a particularly
    deadly form of cancer, Multiple Myeloma.  The goal of building accurate
    models is not only to assess risk, but to provide insight into the 
    disease and potentially offer novel drug targets.},
  pdf = {http://www.waddellinformatics.com/download/waddell04modeling.pdf},
  ps = {http://www.waddellinformatics.com/download/waddell04modeling.ps.gz},
  keywords = {genomics, snps, myeloma}
}
@MISC{waddell04validating,
  author = {Waddell, Michael J.},
  title = {Validating the Effectiveness of Machine Learning Assistance},
  note = {Computation and Informatics in Biology and Medicine (CIBM) Training
           Program Retreat, University of Wisconsin-Madison,  Madison, 
           Wisconsin, October 15, 2004},
  howpublished = {Poster},
  year = {2004},
  month = {October},
  abstract = {
    This poster describes the results of the first part of a two-part study
    to measure the effectiveness of providing users with learned hypotheses
    when performing a learning task.  The first part of the study uses a
    simple domain based on the "East-West Challenge."  In this study,
    subjects are given a set of pictures of cartoon trains where half are
    labeled "eastbound" and half are labeled "westbound."  The subjects'
    task is to correctly label a second, unlabeled set of trains using the
    hypothesis they learned from the first set.  Some subjects will receive
    the output of a relational learning system to aid them in this task.  In
    the second part of this study, we will conduct a similar task using mass
    spectrometry data.  These preliminary studies will lay the groundwork for
    the validation of other types of collaborative machine learning systems.
    This type of validation is not routinely done when working with machine
    learning systems, but instead it is assumed that any help a system can
    provide is beneficial.  This study will challenge that assumption and
    propose instead that the type of assistance given by a system is as
    important as the quality of its learned hypotheses.},
  pdf = {http://www.waddellinformatics.com/pubs/waddell04validating.pdf},
  ps = {http://www.waddellinformatics.com/pubs/waddell04validating.ps.gz},
  keywords = {collaboration, validating, algorithms}
}
@ARTICLE{hardin04evaluation,
  title = {Evaluation of Multiple Models to Distinguish Closely Related Forms
           of Disease Using {DNA} Microarray Data},
  author = {Johanna Hardin and Michael Waddell and Page, C. David and
            Fenghuang Zhan and Bart Barlogie and Shaughnessy, Jr., John and
            John Crowley},
  journal = {Statistical Applications in Genetics and Molecular Biology},
  volume = {3},
  number = {1},
  month = {June},
  year = {2004},
  abstract = {
     Motivation: Standard laboratory classification of the plasma cell
     dyscrasia monoclonal gammopathy of undetermined significance (MGUS) and
     the overt plasma cell neoplasm multiple myeloma (MM) is quite accurate,
     yet, for the most part, biologically uninformative. Most, if not all,
     cancers are caused by inherited or acquired genetic mutations that
     manifest themselves in altered gene expression patterns in the clonally
     related cancer cells. Microarray technology allows for qualitative and
     quantitative measurements of the expression levels of thousands of genes
     simultaneously, and it has now been used both to classify cancers that
     are morphologically indistinguishable and to predict response to therapy.
     It is anticipated that this information can also be used to develop
     molecular diagnostic models and to provide insight into mechanisms of
     disease progression, e.g., transition from healthy to benign hyperplasia
     or conversion of a benign hyperplasia to overt malignancy. However,
     standard data analysis techniques are not trivial to employ on these
     large data sets. Methodology designed to handle large data sets (or
     modified to do so) is needed to access the vital information contained in
     the genetic samples, which in turn can be used to develop more robust and
     accurate methods of clinical diagnostics and prognostics. \\
     \\
     Results: Here we report on the application of a panel of statistical and
     data mining methodologies to classify groups of samples based on
     expression of 12,000 genes derived from a high density oligonucleotide
     microarray analysis of highly purified plasma cells from newly diagnosed
     MM, MGUS, and normal healthy donors. The three groups of samples are each
     tested against each other. The methods are found to be similar in their
     ability to predict group membership; all do quite well at predicting MM
     vs. normal and MGUS vs. normal. However, no method appears to be able to
     distinguish explicitly the genetic mechanisms between MM and MGUS. We
     believe this might be due to the lack of genetic differences between
     these two conditions, and may not be due to the failure of the models. We
     report the prediction errors for each of the models and each of the
     methods. Additionally, we report ROC curves for the comparisons and
     results on predicting MGUS from a model that distinguishes MM samples
     from normal samples.},
  url = {http://www.bepress.com/sagmb/vol3/iss1/art10/},
  pdf = {http://www.waddellinformatics.com/download/hardin04evaluation.pdf},
  ps = {http://www.waddellinformatics.com/download/hardin04evaluation.ps.gz},
  keywords = {myeloma, genomics, microarray, algorithms}
}
@INPROCEEDINGS{waddell05predicting,
  author = {Michael Waddell and David Page and 
            Fenghuang Zhan and Bart Barlogie and Shaughnessy, Jr., John},
  title = {Predicting Cancer Susceptibility from Single-Nucleotide Polymorphism
           Data: A Case Study in Multiple Myeloma},
  booktitle = {Proceedings of {BIOKDD} '05, Chicago, Illinois, August 2005},
  month = {Aug},
  year = {2005},
  abstract = {
    This paper asks whether susceptibility to early-onset (diagnosis before
    age 40) of a particularly deadly form of cancer, Multiple My\-elo\-ma, 
    can be predicted from single-nucleotide polymorphism (SNP) profiles with
    an accuracy greater than chance.  Specifically, given SNP profiles for 80
    Multiple My\-elo\-ma patients -- of which we believe 40 to have high 
    susceptibility and 40 to have lower susceptibility -- we train a support
    vector machine (SVM) to predict age at diagnosis.  We chose SVMs for this
    task because they are well suited to deal with interactions among features
    and redundant features.  The accuracy of the trained SVM estimated by 
    leave-one-out cross-validation is 71\%, significantly greater than random
    guessing.  This result is particularly encouraging since only 3000 SNPs 
    were used in profiling, whereas several million SNPs are known.},
  pdf = {http://www.waddellinformatics.com/download/waddell05predicting.pdf},
  ps = {http://www.waddellinformatics.com/download/waddell05predicting.ps.gz},
  data = {http://www.waddellinformatics.com/download/waddell05predicting.zip}
}
@MISC{waddell05predicting2,
  author = {Michael Waddell and David Page and 
            Fenghuang Zhan and Bart Barlogie and Shaughnessy, Jr., John},
  title = {Predicting Cancer Susceptibility from Single-Nucleotide Polymorphism
           Data: A Case Study in Multiple Myeloma},
  note = {{BIOKDD} '05, Chicago, Illinois, August 21 2005},
  howpublished = {Seminar},
  month = {Aug},
  year = {2005},
  abstract = {
    This paper asks whether susceptibility to early-onset (diagnosis before
    age 40) of a particularly deadly form of cancer, Multiple My\-elo\-ma, 
    can be predicted from single-nucleotide polymorphism (SNP) profiles with
    an accuracy greater than chance.  Specifically, given SNP profiles for 80
    Multiple My\-elo\-ma patients -- of which we believe 40 to have high 
    susceptibility and 40 to have lower susceptibility -- we train a support
    vector machine (SVM) to predict age at diagnosis.  We chose SVMs for this
    task because they are well suited to deal with interactions among features
    and redundant features.  The accuracy of the trained SVM estimated by 
    leave-one-out cross-validation is 71\%, significantly greater than random
    guessing.  This result is particularly encouraging since only 3000 SNPs 
    were used in profiling, whereas several million SNPs are known.},
  pdf = {http://www.waddellinformatics.com/download/waddell05predicting2.pdf},
  ps = {http://www.waddellinformatics.com/download/waddell05predicting2.ps.gz}
}