0) { $query = $query."classification='".mysql_result($classes, $i, 0)."' group by classification order by instances desc"; $classes = mysql_query($query); } } $tree[$root] = mysql_result($classes, 0, 0); } else if (mysql_numrows($classes) == 0) { // else if there are no instances in the data set, no class can be determined $tree[$root] = "*NO DATA*"; } else { // calculate the information gain for each attribute A, according to the average entropy formula $attr = explode(" ",$attr_left); for ($i=0; $i < sizeof($attr); $i++) $gain[$i] = information_gain($attr[$i], $inst_left); // select the attribute with the lowest average entropy (highest information gain) and make this the // attribute tested at the root $tested = 0; for ($i=1; $i < sizeof($attr); $i++) if ($gain[$i] > $gain[$tested]) $tested = $i; // this code was used for question 4 to choose an attribute randomly: // $tested = rand(0,sizeof($attr)-1); $attr = $attr[$tested]; $tree[$root] = $attr; $values = mysql_query("select $attr from training $inst_left group by $attr order by $attr"); for ($i=0; $i < mysql_numrows($values); $i++) { // for each possible value V of this attribute // add a new branch below the root, corresponding to A = V $value = mysql_result($values, $i, 0); $branch = $root."|".$attr."-".$value; // let examples(V) be those examples with A = V $query = "select classification, count(classification) as instances from training $inst_left and $attr='$value' "; $query = $query."group by classification order by instances desc"; $examples = mysql_query($query); if (mysql_numrows($examples) == 0) // if examples(V) is empty, make the new branch a leaf node labelled with the most // common value among examples $tree[$branch] = mysql_result($classes, 0, 0); else { // else let the new branch be the tree created by recursively calling the id3 function $new_attr_left = trim(ereg_replace("( )+($attr)( )+"," "," ".$attr_left." ")); $new_inst_left = $inst_left." and $attr='$value'"; $tree = train_id3_dtree($tree, $branch, $num_attr, $new_attr_left, $new_inst_left, $max_val); } } } return $tree; } function id3_dtree_trace($tree, $root, $level) { /* this function returns a trace of a dtree generated by the id3 algorithm starting from the node $root it parses the tree in a depth-first, inorder fashion, and labels the first node as level $level */ $attr = $tree[$root]; $trace = $trace."LEVEL$level: $attr"; if (mysql_numrows(mysql_query("select id from attributes where id='$attr'")) > 0) { // if root node is an attribute $trace = $trace."\n"; $values = mysql_query("select $attr from training group by $attr order by $attr"); for ($i=0; $i < mysql_numrows($values); $i++) { // parse subtrees $value = mysql_result($values, $i, 0); if (isset($tree[$root."|".$attr."-".$value])) $trace = $trace."BRANCH: $value\n".id3_dtree_trace($tree,$root."|".$attr."-".$value, $level+1); } } else { $query = "select count(instance) from training where classification='$attr'"; $root = explode("|", $root); for ($i=1; $i < sizeof($root); $i++) { $attr = explode("-",$root[$i]); $query = $query." and ".$attr[0]."='".$attr[1]."'"; } $trace = $trace." (".mysql_result(mysql_query($query), 0, 0).")\n"; } return $trace; } function id3_dtree_classify($tree, $root, $set, $instance) { /* this function returns the classification made by the dtree $tree for the element of the data set $set labeled by $instance */ $attr = $tree[$root]; $value = mysql_query("select $attr from $set where instance='$instance'"); if (mysql_error()) // if the attr is not a valid attribute, it must be a classification return $attr; $value = mysql_result($value, 0, 0); if (isset($tree[$root."|".$attr."-".$value])) // if the tree knows how to classify the attribute, continue parsing return id3_dtree_classify($tree, $root."|".$attr."-".$value, $set, $instance); // if the tree doesn't branch on the attribute value, return the most common classification at that node $root = explode("|", $root); $query = "select classification, count(classification) as instances from training where instance like '%'"; for ($i=1; $i < sizeof($root); $i++) { $attr = explode("-",$root[$i]); $query = $query." and ".$attr[0]."='".$attr[1]."' "; } $query = $query."group by classification order by instances desc"; return mysql_result(mysql_query($query), 0, 0); } ?>