Okay, so you know what this is most of you. Just let me know where I fucked up. I'll edit this post with updates as they come. For those that don't know, this is straight C coding. I've chopped out the bits that aren't relevant yet, and this is as short as I can get it.
CODE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#define MAXCHARS 1000
typedef char* data_t;
typedef struct node node_t;
struct node {
node_t* left;
node_t* right;
data_t dna;
};
typedef struct {
node_t* root;
} tree_t;
tree_t*
make_tree(void) {
tree_t *newtree;
assert((newtree = (tree_t*)malloc(sizeof(*newtree))) != NULL);
newtree->root = NULL;
return newtree;
}
node_t*
make_node(data_t value) {
node_t *newnode;
assert((newnode = (node_t*)malloc(sizeof(*newnode))) != NULL);
newnode->dna = value;
newnode->left = newnode->right = NULL;
return newnode;
}
node_t*
recursive_insert(node_t *root, data_t value) {
int comparison;
if (root == NULL) {
return make_node(value);
}
else {
comparison = compare(value, root->dna);
if (comparison < 0) {
root->left = recursive_insert(root->left, value);
}
else if (comparison > 0) {
root->right = recursive_insert(root->right, value);
}
return root;
}
}
tree_t*
insert_node(tree_t *tree, data_t value) {
tree->root = recursive_insert(tree->root, value);
}
int
compare (data_t v1, data_t v2) {
strcmp(v1, v2);
}
node_t*
recursive_search (node_t *root, data_t value) {
int comparison;
if (root == NULL) {
return NULL;
}
comparison = compare (value, root->dna);
if (comparison == 0) {
return root;
}
else if (comparison < 0) {
return recursive_search(root->left, value);
}
else {
return recursive_search(root->right, value);
}
}
node_t*
search_tree (tree_t *tree, data_t value) {
return recursive_search(tree->root, value);
}
void
recursive_print(node_t* root) {
if (root == NULL) {
return;
}
recursive_print(root->left);
printf("%s\n", root->dna);
recursive_print(root->right);
}
void
inorder_print(tree_t* tree) {
recursive_print(tree->root);
}
char
fget_nucleotide(FILE *inpf) {
int c;
while ((c=getc(inpf))!=EOF && !isalpha(c)) {
}
if (c==EOF) {
return EOF;
}
c = toupper(c);
if (c == 'G'){
return c;
}
else if (c == 'A') {
return c;
}
else if (c == 'T') {
return c;
}
else if (c == 'C') {
return c;
}
else
return NULL;
}
int
main(int argc, char **argv) {
char onenucleotide;
char onesequence[MAXCHARS];
char newsequence[MAXCHARS];
tree_t* allsequences;
int count = 0;
node_t* location;
char inputfilename[MAXCHARS+1];
FILE *inputf;
int limit = 0;
int j = 0;
int i;
printf("Welcome to the Nucleotide-o-matic. \n");
printf("Please enter the input file you wish to be DNAed:" );
scanf("%s", inputfilename);
inputf = fopen(inputfilename, "r");
if (inputf == NULL) {
printf("Mayday! Mayday! File %s cannot be opened! \n", inputfilename);
exit(EXIT_FAILURE);
}
for(i = 0; i < MAXCHARS; i++) {
onenucleotide = fget_nucleotide(inputf);
if (onenucleotide == EOF) {
onesequence[j] = '\0';
break;
}
if (onenucleotide != NULL) {
onesequence[j] = onenucleotide;
j += 1;
}
fseek(inputf, (i + 1) , SEEK_SET);
}
allsequences = make_tree();
limit = j;
j = 0;
for(i = 0; i < limit; i++) {
for(j = 0; j < limit; j++) {
newsequence[i] = onesequence [j];
location = search_tree(allsequences, newsequence);
if ( location == NULL) {
allsequences = insert_node(allsequences, newsequence);
}
}
}
inorder_print(allsequences);
return 0;
}