Question:
To compute the conditional probabilities you need to determine unigram and
bigram counts first (you can do this in a single pass through a file if you do things
carefully) and store them in a Binary Search Tree (BST). After that, you can compute
the conditional probabilities.
Input files
Test files can be found on (http://www.gutenberg.org/ebooks/). For example,
search for “Mark Twain.” Then click on any of his books. Next download the “Plain
Text UTF-8” format.
In addition, you should test your program on other input files as well, for which you
can hand-compute the correct answer.
Output files
Your program must accept the name of an input file as a command line argument.
Let's call the file name of this file fn. Your program must then produce as output the
following set of files:
• Your program must write the unigram counts to a file named fn.uni in which
each unigram is listed on a separate line, and each line contains just the
unigram and its count (an integer), separated by a single space.
• Your program must write the bigram counts to a file named fn.bi in which
each bigram is listed on a separate line, and each line contains just the
bigram and its count (an integer), separated by a single space.
• Your program must write the conditional probabilities to a file named fn.cp,
reported in the form P(WORD(k)|WORD(k-1)) = p, where p is the conditional
probability of WORD(k) given WORD(k-1).
Notes
• You may use any BST implementation found online at your own risk
(provided the source of this code is cited properly). 50 marks bonus would
be given if you implemented the BST yourself (only the functionalities
needed to complete your project).
• Your program should accept file name(s) as command line argument(s) (no
hard-coded file names in your code).
• Your code will be tested using the latest version of the GNU C compiler on a
Unix-based operating system. If you do not have a Unix-based machine you
may want to use Windows Subsystem or a Virtual Box under your own
responsibility!
• Your code must be well commented. When writing your comments, you
should focus on what the code does at a high level; for example, describe the
main steps of an algorithm—not every detail (that what the code is for).
• A ReadMe.txt file (including instructions on how to compile and how to run
your program along with any known problems) must be submitted

Answer:
Step 1
If root == NULL
return NULL;
If number == root->data
return root->data;
If number < root->data
return search(root->left)
If number > root->data
return search(root->right)
Step 2
#include <stdio.h>
#include <stdlib.h>
struct btnode
{
int value;
struct btnode *l;
struct btnode *r;
}*root = NULL, *temp = NULL, *t2, *t1;
void delete1();
void insert();
void delete();
void inorder(struct btnode *t);
void create();
void search(struct btnode *t);
void preorder(struct btnode *t);
void postorder(struct btnode *t);
void search1(struct btnode *t,int data);
int smallest(struct btnode *t);
int largest(struct btnode *t);
int flag = 1;
void main()
{
int ch;
printf("\nOPERATIONS ---");
printf("\n1 - Insert an element into tree\n");
printf("2 - Delete an element from the tree\n");
printf("3 - Inorder Traversal\n");
printf("4 - Preorder Traversal\n");
printf("5 - Postorder Traversal\n");
printf("6 - Exit\n");
while(1)
{
printf("\nEnter your choice : ");
scanf("%d", &ch);
switch (ch)
{
case 1:
insert();
break;
case 2:
delete();
break;
case 3:
inorder(root);
break;
case 4:
preorder(root);
break;
case 5:
postorder(root);
break;
case 6:
exit(0);
default :
printf("Wrong choice, Please enter correct choice ");
break;
}
}
}
void insert()
{
create();
if (root == NULL)
root = temp;
else
search(root);
}
void create()
{
int data;
printf("Enter data of node to be inserted : ");
scanf("%d", &data);
temp = (struct btnode *)malloc(1*sizeof(struct btnode));
temp->value = data;
temp->l = temp->r = NULL;
}
void search(struct btnode *t)
{
if ((temp->value > t->value) && (t->r != NULL))
search(t->r);
else if ((temp->value > t->value) && (t->r == NULL))
t->r = temp;
else if ((temp->value < t->value) && (t->l != NULL))
search(t->l);
else if ((temp->value < t->value) && (t->l == NULL))
t->l = temp;
}
void inorder(struct btnode *t)
{
if (root == NULL)
{
printf("No elements in a tree to display");
return;
}
if (t->l != NULL)
inorder(t->l);
printf("%d -> ", t->value);
if (t->r != NULL)
inorder(t->r);
}
void delete()
{
int data;
if (root == NULL)
{
printf("No elements in a tree to delete");
return;
}
printf("Enter the data to be deleted : ");
scanf("%d", &data);
t1 = root;
t2 = root;
search1(root, data);
}
void preorder(struct btnode *t)
{
if (root == NULL)
{
printf("No elements in a tree to display");
return;
}
printf("%d -> ", t->value);
if (t->l != NULL)
preorder(t->l);
if (t->r != NULL)
preorder(t->r);
}
void postorder(struct btnode *t)
{
if (root == NULL)
{
printf("No elements in a tree to display ");
return;
}
if (t->l != NULL)
postorder(t->l);
if (t->r != NULL)
postorder(t->r);
printf("%d -> ", t->value);
}
void search1(struct btnode *t, int data)
{
if ((data>t->value))
{
t1 = t;
search1(t->r, data);
}
else if ((data < t->value))
{
t1 = t;
search1(t->l, data);
}
else if ((data==t->value))
{
delete1(t);
}
}
void delete1(struct btnode *t)
{
int k;
if ((t->l == NULL) && (t->r == NULL))
{
if (t1->l == t)
{
t1->l = NULL;
}
else
{
t1->r = NULL;
}
t = NULL;
free(t);
return;
}
else if ((t->r == NULL))
{
if (t1 == t)
{
root = t->l;
t1 = root;
}
else if (t1->l == t)
{
t1->l = t->l;
}
else
{
t1->r = t->l;
}
t = NULL;
free(t);
return;
}
else if (t->l == NULL)
{
if (t1 == t)
{
root = t->r;
t1 = root;
}
else if (t1->r == t)
t1->r = t->r;
else
t1->l = t->r;
t == NULL;
free(t);
return;
}
else if ((t->l != NULL) && (t->r != NULL))
{
t2 = root;
if (t->r != NULL)
{
k = smallest(t->r);
flag = 1;
}
else
{
k =largest(t->l);
flag = 2;
}
search1(root, k);
t->value = k;
}
}
int smallest(struct btnode *t)
{
t2 = t;
if (t->l != NULL)
{
t2 = t;
return(smallest(t->l));
}
else
return (t->value);
}
int largest(struct btnode *t)
{
if (t->r != NULL)
{
t2 = t;
return(largest(t->r));
}
else
return(t->value);
}