<html xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:w="urn:schemas-microsoft-com:office:word"
xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"
xmlns:mv="http://macVmlSchemaUri" xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta name=Title content="">
<meta name=Keywords content="">
<meta http-equiv=Content-Type content="text/html; charset=unicode">
<meta name=ProgId content=Word.Document>
<meta name=Generator content="Microsoft Word 14">
<meta name=Originator content="Microsoft Word 14">
<link rel=File-List href="Toolkit_files/filelist.xml">
<!--[if gte mso 9]><xml>
 <o:DocumentProperties>
  <o:Author>Tony Martinez</o:Author>
  <o:LastAuthor>Tony Martinez</o:LastAuthor>
  <o:Revision>10</o:Revision>
  <o:TotalTime>42</o:TotalTime>
  <o:Created>2010-01-13T22:37:00Z</o:Created>
  <o:LastSaved>2016-01-07T20:05:00Z</o:LastSaved>
  <o:Pages>2</o:Pages>
  <o:Words>885</o:Words>
  <o:Characters>5051</o:Characters>
  <o:Company>Brigham Young University</o:Company>
  <o:Lines>42</o:Lines>
  <o:Paragraphs>11</o:Paragraphs>
  <o:CharactersWithSpaces>5925</o:CharactersWithSpaces>
  <o:Version>14.0</o:Version>
 </o:DocumentProperties>
 <o:OfficeDocumentSettings>
  <o:AllowPNG/>
  <o:PixelsPerInch>120</o:PixelsPerInch>
 </o:OfficeDocumentSettings>
</xml><![endif]-->
<link rel=themeData href="Toolkit_files/themedata.xml">
<!--[if gte mso 9]><xml>
 <w:WordDocument>
  <w:Zoom>150</w:Zoom>
  <w:SpellingState>Clean</w:SpellingState>
  <w:GrammarState>Clean</w:GrammarState>
  <w:TrackMoves>false</w:TrackMoves>
  <w:TrackFormatting/>
  <w:ValidateAgainstSchemas/>
  <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
  <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
  <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
  <w:DoNotPromoteQF/>
  <w:LidThemeOther>EN-US</w:LidThemeOther>
  <w:LidThemeAsian>X-NONE</w:LidThemeAsian>
  <w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript>
  <w:Compatibility>
   <w:SplitPgBreakAndParaMark/>
   <w:UseFELayout/>
  </w:Compatibility>
  <m:mathPr>
   <m:mathFont m:val="Cambria Math"/>
   <m:brkBin m:val="before"/>
   <m:brkBinSub m:val="&#45;-"/>
   <m:smallFrac m:val="off"/>
   <m:dispDef/>
   <m:lMargin m:val="0"/>
   <m:rMargin m:val="0"/>
   <m:defJc m:val="centerGroup"/>
   <m:wrapIndent m:val="1440"/>
   <m:intLim m:val="subSup"/>
   <m:naryLim m:val="undOvr"/>
  </m:mathPr></w:WordDocument>
</xml><![endif]--><!--[if gte mso 9]><xml>
 <w:LatentStyles DefLockedState="false" DefUnhideWhenUsed="false"
  DefSemiHidden="false" DefQFormat="false" LatentStyleCount="276">
  <w:LsdException Locked="false" QFormat="true" Name="Normal"/>
  <w:LsdException Locked="false" QFormat="true" Name="heading 1"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 2"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 3"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 4"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 5"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 6"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 7"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 8"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="heading 9"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="caption"/>
  <w:LsdException Locked="false" QFormat="true" Name="Title"/>
  <w:LsdException Locked="false" Priority="1" Name="Default Paragraph Font"/>
  <w:LsdException Locked="false" QFormat="true" Name="Subtitle"/>
  <w:LsdException Locked="false" QFormat="true" Name="Strong"/>
  <w:LsdException Locked="false" QFormat="true" Name="Emphasis"/>
  <w:LsdException Locked="false" Priority="99" Name="No List"/>
  <w:LsdException Locked="false" QFormat="true" Name="No Spacing"/>
  <w:LsdException Locked="false" QFormat="true" Name="List Paragraph"/>
  <w:LsdException Locked="false" QFormat="true" Name="Quote"/>
  <w:LsdException Locked="false" QFormat="true" Name="Intense Quote"/>
  <w:LsdException Locked="false" QFormat="true" Name="Subtle Emphasis"/>
  <w:LsdException Locked="false" QFormat="true" Name="Intense Emphasis"/>
  <w:LsdException Locked="false" QFormat="true" Name="Subtle Reference"/>
  <w:LsdException Locked="false" QFormat="true" Name="Intense Reference"/>
  <w:LsdException Locked="false" QFormat="true" Name="Book Title"/>
  <w:LsdException Locked="false" SemiHidden="true" UnhideWhenUsed="true"
   QFormat="true" Name="TOC Heading"/>
 </w:LatentStyles>
</xml><![endif]-->
<style>
<!--
 /* Font Definitions */
@font-face
	{font-family:Times;
	panose-1:2 0 5 0 0 0 0 0 0 0;
	mso-font-charset:0;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:3 0 0 0 1 0;}
@font-face
	{font-family:"ＭＳ 明朝";
	mso-font-charset:78;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:-536870145 1791491579 18 0 131231 0;}
@font-face
	{font-family:"ＭＳ 明朝";
	mso-font-charset:78;
	mso-generic-font-family:auto;
	mso-font-pitch:variable;
	mso-font-signature:-536870145 1791491579 18 0 131231 0;}
 /* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-parent:"";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
h1
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-link:"Heading 1 Char";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	mso-outline-level:1;
	font-size:24.0pt;
	font-family:Times;
	mso-fareast-font-family:"ＭＳ 明朝";
	mso-fareast-theme-font:minor-fareast;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:minor-bidi;}
a:link, span.MsoHyperlink
	{mso-style-unhide:no;
	color:blue;
	text-decoration:underline;
	text-underline:single;}
a:visited, span.MsoHyperlinkFollowed
	{mso-style-unhide:no;
	color:blue;
	text-decoration:underline;
	text-underline:single;}
span.Heading1Char
	{mso-style-name:"Heading 1 Char";
	mso-style-unhide:no;
	mso-style-locked:yes;
	mso-style-link:"Heading 1";
	mso-ansi-font-size:16.0pt;
	mso-bidi-font-size:16.0pt;
	font-family:Calibri;
	mso-ascii-font-family:Calibri;
	mso-ascii-theme-font:major-latin;
	mso-fareast-font-family:"ＭＳ ゴシック";
	mso-fareast-theme-font:major-fareast;
	mso-hansi-font-family:Calibri;
	mso-hansi-theme-font:major-latin;
	mso-bidi-font-family:"Times New Roman";
	mso-bidi-theme-font:major-bidi;
	color:#345A8A;
	font-weight:bold;}
span.SpellE
	{mso-style-name:"";
	mso-spl-e:yes;}
span.GramE
	{mso-style-name:"";
	mso-gram-e:yes;}
.MsoChpDefault
	{mso-style-type:export-only;
	mso-default-props:yes;
	font-size:10.0pt;
	mso-ansi-font-size:10.0pt;
	mso-bidi-font-size:10.0pt;}
@page WordSection1
	{size:8.5in 11.0in;
	margin:1.0in 1.25in 1.0in 1.25in;
	mso-header-margin:.5in;
	mso-footer-margin:.5in;
	mso-paper-source:0;}
div.WordSection1
	{page:WordSection1;}
 /* List Definitions */
@list l0
	{mso-list-id:130905540;
	mso-list-template-ids:2100305924;}
@list l0:level1
	{mso-level-tab-stop:.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l0:level2
	{mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l0:level3
	{mso-level-tab-stop:1.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l0:level4
	{mso-level-tab-stop:2.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l0:level5
	{mso-level-tab-stop:2.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l0:level6
	{mso-level-tab-stop:3.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l0:level7
	{mso-level-tab-stop:3.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l0:level8
	{mso-level-tab-stop:4.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l0:level9
	{mso-level-tab-stop:4.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l1
	{mso-list-id:224142130;
	mso-list-template-ids:-358716760;}
@list l1:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l1:level2
	{mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l1:level3
	{mso-level-tab-stop:1.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l1:level4
	{mso-level-tab-stop:2.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l1:level5
	{mso-level-tab-stop:2.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l1:level6
	{mso-level-tab-stop:3.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l1:level7
	{mso-level-tab-stop:3.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l1:level8
	{mso-level-tab-stop:4.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l1:level9
	{mso-level-tab-stop:4.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l2
	{mso-list-id:460541317;
	mso-list-template-ids:2087344860;}
@list l2:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l2:level2
	{mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l2:level3
	{mso-level-tab-stop:1.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l2:level4
	{mso-level-tab-stop:2.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l2:level5
	{mso-level-tab-stop:2.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l2:level6
	{mso-level-tab-stop:3.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l2:level7
	{mso-level-tab-stop:3.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l2:level8
	{mso-level-tab-stop:4.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l2:level9
	{mso-level-tab-stop:4.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l3
	{mso-list-id:1348941182;
	mso-list-template-ids:-808057944;}
@list l3:level1
	{mso-level-number-format:bullet;
	mso-level-text:;
	mso-level-tab-stop:.5in;
	mso-level-number-position:left;
	text-indent:-.25in;
	mso-ansi-font-size:10.0pt;
	font-family:Symbol;}
@list l3:level2
	{mso-level-tab-stop:1.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l3:level3
	{mso-level-tab-stop:1.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l3:level4
	{mso-level-tab-stop:2.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l3:level5
	{mso-level-tab-stop:2.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l3:level6
	{mso-level-tab-stop:3.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l3:level7
	{mso-level-tab-stop:3.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l3:level8
	{mso-level-tab-stop:4.0in;
	mso-level-number-position:left;
	text-indent:-.25in;}
@list l3:level9
	{mso-level-tab-stop:4.5in;
	mso-level-number-position:left;
	text-indent:-.25in;}
ol
	{margin-bottom:0in;}
ul
	{margin-bottom:0in;}
-->
</style>
<!--[if gte mso 10]>
<style>
 /* Style Definitions */
table.MsoNormalTable
	{mso-style-name:"Table Normal";
	mso-tstyle-rowband-size:0;
	mso-tstyle-colband-size:0;
	mso-style-noshow:yes;
	mso-style-priority:99;
	mso-style-parent:"";
	mso-padding-alt:0in 5.4pt 0in 5.4pt;
	mso-para-margin:0in;
	mso-para-margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:"Times New Roman";}
</style>
<![endif]--><!--[if gte mso 9]><xml>
 <o:shapedefaults v:ext="edit" spidmax="1027"/>
</xml><![endif]--><!--[if gte mso 9]><xml>
 <o:shapelayout v:ext="edit">
  <o:idmap v:ext="edit" data="1"/>
 </o:shapelayout></xml><![endif]-->
</head>

<body bgcolor=white lang=EN-US link=blue vlink=blue style='tab-interval:.5in'>

<div class=WordSection1>

<h1 align=center style='text-align:center'><span style='mso-fareast-font-family:
"Times New Roman";mso-bidi-font-family:"Times New Roman"'>CS 478 Tool Kit
documentation<o:p></o:p></span></h1>

<p class=MsoNormal><span style='font-size:12.0pt;font-family:"Times New Roman"'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt;font-family:"Times New Roman"'>We
will always use ARFF files for our datasets, and we will make the assumption
that all data will fit in RAM. Details on ARFF are found </span><a
href="http://axon.cs.byu.edu/%7Emartinez/classes/478/stuff/arff.html"><span
style='font-size:12.0pt;font-family:"Times New Roman";color:#0000EE'>here</span></a><span
style='font-size:12.0pt;font-family:"Times New Roman"'>.&nbsp;&nbsp; A
collection of data sets already in the ARFF format can be found </span><a
href="http://axon.cs.byu.edu/data/"><span style='font-size:12.0pt;font-family:
"Times New Roman"'>here</span></a><span style='font-size:12.0pt;font-family:
"Times New Roman"'>.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt;font-family:"Times New Roman"'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>A basic tool kit is provided
in C++, Java, and Python to help you get started implementing learning algorithms. You
are also welcome to code up your own toolkit or modify the source code made
available here however you want.<span style="mso-spacerun:yes">  </span>If you
do so, here are a few things to keep in mind: <o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<ul style='margin-top:0in' type=disc>
 <li class=MsoNormal style='mso-list:l1 level1 lfo1;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Some of the labs require a high number of
     computations. Using an interpreted language may result in longer runtimes.
     (The TAs are not sure exactly how much longer, since we have never tried
     any of the labs in an interpreted language. Just be forewarned.) <o:p></o:p></span></li>
 <li class=MsoNormal style='mso-list:l1 level1 lfo1;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Advice about dealing with discrete-valued data: A
     common way to represent each &quot;instance&quot; (a.k.a.
     &quot;pattern&quot;) is to use a vector of numbers. For discrete values,
     these numbers could be an index to the &quot;name&quot; of that value in
     the metadata. <o:p></o:p></span></li>
</ul>

<p class=MsoNormal style='margin-left:.5in'><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>The CS 478 tool kit is
intended as a starting place for working with machine learning algorithms. It
provides the following functionality to run your algorithms: <o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<ul style='margin-top:0in' type=disc>
 <li class=MsoNormal style='mso-list:l2 level1 lfo2;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Parses and stores the ARFF file <o:p></o:p></span></li>
 <li class=MsoNormal style='mso-list:l2 level1 lfo2;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Randomizes the instances in the ARFF file <o:p></o:p></span></li>
 <li class=MsoNormal style='mso-list:l2 level1 lfo2;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Provides four evaluation methods</span><span
     style='font-size:12.0pt;font-family:"Times New Roman"'> (A more detailed
     description of these methods is found </span><a
     href="http://axon.cs.byu.edu/%7Emartinez/classes/478/stuff/cs478.accuracy.html"><span
     style='font-size:12.0pt;font-family:"Times New Roman"'>here</span></a><span
     style='font-size:12.0pt;font-family:"Times New Roman"'>)</span><span
     style='font-size:12.0pt'>: <o:p></o:p></span></li>
 <ol style='margin-top:0in' start=1 type=1>
  <li class=MsoNormal style='mso-list:l2 level2 lfo2;tab-stops:list 1.0in'><span
      style='font-size:12.0pt'>Training set method:<br>
      The model is evaluated on the same data set that was used for training <o:p></o:p></span></li>
  <li class=MsoNormal style='mso-list:l2 level2 lfo2;tab-stops:list 1.0in'><span
      style='font-size:12.0pt'>Static split test set method:<br>
      Two distinct data sets are made available to the learning algorithm; one
      for training and one for testing <o:p></o:p></span></li>
  <li class=MsoNormal style='mso-list:l2 level2 lfo2;tab-stops:list 1.0in'><span
      style='font-size:12.0pt'>Random split test set method:<br>
      A single data set is made available to the learning algorithm and the
      data set is split such that <i style='mso-bidi-font-style:normal'>x</i>%
      of the instances are randomly selected for training and the remainder are
      used for testing, where you supply the value of <i style='mso-bidi-font-style:
      normal'>x</i>.<o:p></o:p></span></li>
  <li class=MsoNormal style='mso-list:l2 level2 lfo2;tab-stops:list 1.0in'><i
      style='mso-bidi-font-style:normal'><span style='font-size:12.0pt'>N</span></i><span
      style='font-size:12.0pt'>-fold cross-validation method<br>
      <span class=GramE>A</span> single data set is made available to the
      learning algorithm which is partitioned into <i style='mso-bidi-font-style:
      normal'>N</i> equally sized subsets.<span style="mso-spacerun:yes"> 
      </span>Each subset is used once for evaluating the learning algorithm
      while the remaining instances are used for training.<span
      style="mso-spacerun:yes">  </span>The results of the <i style='mso-bidi-font-style:
      normal'>N</i> runs are then averaged to provide the final accuracy
      estimate. <o:p></o:p></span></li>
 </ol>
 <li class=MsoNormal style='mso-list:l2 level1 lfo2;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Parse command-line arguments <o:p></o:p></span></li>
 <li class=MsoNormal style='mso-list:l2 level1 lfo2;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Normalize attributes <o:p></o:p></span></li>
</ul>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><b style='mso-bidi-font-weight:normal'><span
style='font-size:12.0pt'>Build Instructions for the Java version:</span></b><span
style='font-size:12.0pt'><br>
Download the zip file <a
href="http://axon.cs.byu.edu/~martinez/classes/478/stuff/toolkitJava.zip">here</a><o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>Build Instructions for Linux:<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>&nbsp;&nbsp; 1. Unzip the zip
file<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>&nbsp;&nbsp; 2. <span
class=SpellE><span class=GramE>javac</span></span> *.java<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><br>
(We do not use windows, but there is nothing fancy and I imagine that it should
work in Microsoft Visual C++. You will need to create a project solution. If
you use windows and have trouble, come see us for help.)<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><b><span style='font-size:12.0pt'>Build Instructions for the
C++ version:</span></b><span style='font-size:12.0pt'><o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>Open the terminal <o:p></o:p></span></p>

<p class=MsoNormal><span class=SpellE><span class=GramE><span style='font-size:
12.0pt'>wget</span></span></span><span style='font-size:12.0pt'> <a
href="http://axon.cs.byu.edu/~martinez/classes/478/stuff/toolkitc.zip">http://axon.cs.byu.edu/~martinez/classes/478/stuff/toolkitc.zip</a><o:p></o:p></span></p>

<p class=MsoNormal><span class=GramE><span style='font-size:12.0pt'>unzip</span></span><span
style='font-size:12.0pt'> toolkitc.zip<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>cd toolkit/<span
class=SpellE>src</span>/<o:p></o:p></span></p>

<p class=MsoNormal><span class=GramE><span style='font-size:12.0pt'>make</span></span><span
style='font-size:12.0pt'> opt<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><b><span style='font-size:12.0pt'>Test to make sure that the
toolkit works:</span></b><span style='font-size:12.0pt'><o:p></o:p></span></p>

<p class=MsoNormal><span class=SpellE><span class=GramE><span style='font-size:
12.0pt'>mkdir</span></span></span><span style='font-size:12.0pt'> datasets<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>cd datasets/<o:p></o:p></span></p>

<p class=MsoNormal><span class=SpellE><span class=GramE><span style='font-size:
12.0pt'>wget</span></span></span><span style='font-size:12.0pt'> <a
href="http://axon.cs.byu.edu/~martinez/classes/478/stuff/iris.arff">http://axon.cs.byu.edu/~martinez/classes/478/stuff/iris.arff</a><o:p></o:p></span></p>

<p class=MsoNormal><span class=GramE><span style='font-size:12.0pt'>cd ..</span></span><span
style='font-size:12.0pt'><o:p></o:p></span></p>

<p class=MsoNormal><span class=GramE><span style='font-size:12.0pt'>./</span></span><span
style='font-size:12.0pt'>bin/<span class=SpellE>MLSystemManager</span> -L
baseline -A datasets/<span class=SpellE>iris.arff</span> -E training<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>You should see the results
for a baseline classifier (33% accuracy on iris)<br style='mso-special-character:
line-break'>
<![if !supportLineBreakNewLine]><br style='mso-special-character:line-break'>
<![endif]><o:p></o:p></span></p>

<p class=MsoNormal><b style='mso-bidi-font-weight:normal'><span
style='font-size:12.0pt'>Usage Instructions for both version:<br>
</span></b><span class=SpellE><span style='font-size:12.0pt'>MLSystemManager</span></span><span
style='font-size:12.0pt'> -L [<span class=SpellE>LearningAlgorithm</span>] -A [<span
class=SpellE>ARFF_File</span>] -E [<span class=SpellE>EvaluationMethod</span>]
{[<span class=SpellE>ExtraParameters</span>]} [-N] [-R seed]<br>
<br>
Where the -N will normalize the training and test data sets (Normalization max
and min will come from the training set).<br>
<br>
The -R allows you pass in a seed for the random number generator. By default
each time you run the code, the data set will be shuffled differently. If you
wish to produce the same shuffle, provide a seed such as 1 or 2.<br>
<br>
Possible evaluation methods are: <o:p></o:p></span></p>

<ul style='margin-top:0in' type=disc>
 <li class=MsoNormal style='mso-list:l3 level1 lfo4;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Training (using same data set for training and
     testing):<o:p></o:p></span></li>
</ul>

<p class=MsoNormal style='margin-left:.5in'><span style='font-size:12.0pt'><br>
<span class=GramE>./</span><span class=SpellE>MLSystemManager</span> -L [<span
class=SpellE>LearningAlgorithm</span>] -A [<span class=SpellE>ARFF_File</span>]
-E training<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<ul style='margin-top:0in' type=disc>
 <li class=MsoNormal style='mso-list:l3 level1 lfo4;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Static Split (2 distinct datasets/ARFF files; one
     for training and one for testing:<o:p></o:p></span></li>
</ul>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal style='margin-left:.5in'><span class=GramE><span
style='font-size:12.0pt'>./</span></span><span class=SpellE><span
style='font-size:12.0pt'>MLSystemManager</span></span><span style='font-size:
12.0pt'> -L [<span class=SpellE>LearningAlgorithm</span>] -A [<span
class=SpellE>ARFF_File</span>] -E static [<span class=SpellE>TestARFF_File</span>]<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<ul style='margin-top:0in' type=disc>
 <li class=MsoNormal style='mso-list:l3 level1 lfo4;tab-stops:list .5in'><span
     style='font-size:12.0pt'>Random Split (1 dataset is split randomly
     providing x% for training and the rest for testing):<o:p></o:p></span></li>
</ul>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal style='margin-left:.5in'><span class=GramE><span
style='font-size:12.0pt'>./</span></span><span class=SpellE><span
style='font-size:12.0pt'>MLSystemManager</span></span><span style='font-size:
12.0pt'> -L [<span class=SpellE>LearningAlgorithm</span>] -A [<span
class=SpellE>ARFF_File</span>] -E random [<span class=SpellE>PercentageForTraining</span>]<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<ul style='margin-top:0in' type=disc>
 <li class=MsoNormal style='mso-list:l3 level1 lfo4;tab-stops:list .5in'><span
     style='font-size:12.0pt'>N-fold Cross-validation (1 dataset is partitioned
     into N partitions. The learning algorithm is evaluated on each portion and
     then the average accuracy is returned<span class=GramE>.:</span><o:p></o:p></span></li>
</ul>

<p class=MsoNormal style='margin-left:.5in'><span style='font-size:12.0pt'><br>
<span class=GramE>./</span><span class=SpellE>MLSystemManager</span> -L [<span
class=SpellE>LearningAlgorithm</span>] -A [<span class=SpellE>ARFF_File</span>]
-E cross [<span class=SpellE>NumOfFolds</span>] <o:p></o:p></span></p>

<p class=MsoNormal><o:p>&nbsp;</o:p></p>

<p class=MsoNormal><span style='font-size:12.0pt'>Here is an example of using
the C++ ML <span class=SpellE>tookit</span> and the output:</span></p>

<p class=MsoNormal><o:p>&nbsp;</o:p></p>

<p class=MsoNormal style='margin-left:.5in'><span class=GramE><span
style='font-size:12.0pt'>./</span></span><span class=SpellE><span
style='font-size:12.0pt'>MLSystemManager</span></span><span style='font-size:
12.0pt'> -L dummy -A ../Research/<span class=SpellE>dataSets</span>/<span
class=SpellE>iris.arff</span> -E training -N<br>
Dataset name: iris<br>
Dataset is normalized.<br>
Number of instances: 150<br>
Learning algorithm: dummy<br>
Evaluation method: training<br>
<br>
Accuracy on the training set:<br>
Output classes accuracy: <br>
Iris-<span class=SpellE>setosa</span>: 1<br>
Iris-<span class=SpellE>versicolor</span>: 0<br>
Iris-<span class=SpellE>virginica</span>: 0<br>
Set accuracy: 0.333333<br>
<br>
Accuracy on the test set:<br>
Output classes accuracy: <br>
Iris-<span class=SpellE>setosa</span>: 1<br>
Iris-<span class=SpellE>versicolor</span>: 0<br>
Iris-<span class=SpellE>virginica</span>: 0<br>
Set accuracy: 0.333333<br>
<br>
Time to train: 5.96046e-06 seconds<span style="mso-spacerun:yes"> 
</span>(Note: If the simulation starts before midnight and ends after, the time
will not be accurate)</span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'>A <span class=SpellE>DummyLearner</span>
class is provided that classifies all instances as the majority class (<span
class=SpellE>BaselineLearner</span>). This class can be used as a template for
creating your own learning algorithms. The instances are stored in a vector of
vectors of doubles (<span class=SpellE>c++</span> version) or an <span
class=SpellE>ArrayList</span> of <span class=SpellE>ArrayLists</span> of doubles
(java version). For further implementation questions, please see the <span
class=SpellE>TAs.</span> When creating a new learning algorithm, you need to
add the include line in the <span class=SpellE>MLSystemManager</span> file and
it must inherit from the Learner class.<o:p></o:p></span></p>

<p class=MsoNormal><span style='font-size:12.0pt'><o:p>&nbsp;</o:p></span></p>

</div>

</body>

</html>