/******************************************************************************/
/* psx-dst.c                                  Distribution Handling (for smi) */
/******************************************************************************/
/** @file psx-dst.c Distrubution Handling - Source Code File
 * Functions providing statistical means to support stochastical matching.
 * Handling distribution and/or relative distribution for every item of a record
 * is the main purpose of this module. 
 */


#include "psx.h"

/******************************************************************************/
/*                                                                            */
/******************************************************************************/

/* for logging: */
#define MOD PSX_MOD_DST

/******************************************************************************/
/* Private                                                                    */
/******************************************************************************/

/** concatenated list of all distributions     */
static PSX_DST * dst_list;


/******************************************************************************/
/** allocate psx distribution structure without initializing
 * @paramdst pointer to dst structure pointer
 */
static BOOL dst_create (PSX_DST ** dst)
{
 assert (dst != NULL);

 if ((*dst = malloc (sizeof (PSX_DST))) == NULL)
  return (FALSE);

 return (TRUE);
}

/******************************************************************************/
/** delete psx distribution structure
 * @param dst pointer to psx distribution structure
 */
 
static BOOL dst_delete (PSX_DST ** dst)
{
 assert (dst != NULL);

 if (*dst != NULL)
  free (*dst);

 *dst = NULL;

 return (TRUE);
}
 
/******************************************************************************/
/** add distribution structure element in front of concatenated list
 * @param dst pointer to distribution structure
 */
 
static void dst_prepend (PSX_DST * dst)
{
 /* anfgen am Listenkopf: */
 dst -> next = dst_list;
 dst_list = dst;
}

/******************************************************************************/
/** create relative distribution for attribute and insert in list
 * @param atr string pointer to attribute
 */
static BOOL dst_make_rel (const char * atr)
{
 PSX_DST * dst;
 
 if (!dst_create (&dst))
  return (FALSE);

 strcpy (dst -> atr,atr);
 dst -> dst = DST_REL;         /* insert into list */
 dst_prepend (dst);
 return (TRUE);
}

/******************************************************************************/
/** calculate distribution with n characteristics for attribute atr and insert
 *  them into list
 * @param atr string pointer to attribute
 * @param n number of characteristics for attribute atr
 */
 
static BOOL dst_make_uni (const char * atr,int n)
{
 PSX_DST * dst;
 
 if (!dst_create (&dst))
  return (FALSE);
 
 strcpy (dst -> atr,atr);
 dst -> dst = DST_UNI;
 dst -> n = n;
 dst_prepend (dst);
 return (TRUE);
}

/******************************************************************************/
/** read configuration and create respective distribution structures
 *       <<<< under construcion >>>>
 */
 
static BOOL dst_cfg_get ()
{
 /* at the moment this is a test-procedure, which does not read any configuration
 */
 dst_make_rel ("hio");
 dst_make_rel ("hic");
 dst_make_rel ("lname_C1");
 dst_make_rel ("lname_C2");
 dst_make_rel ("lname_C3");
 dst_make_rel ("lname_PC");
 dst_make_rel ("lname_PH");
 dst_make_rel ("aname_C1");
 dst_make_rel ("aname_C2");
 dst_make_rel ("aname_C3");
 dst_make_rel ("aname_PC");
 dst_make_rel ("aname_PH");
 dst_make_rel ("fname_C1");
 dst_make_rel ("fname_C2");
 dst_make_rel ("fname_C3");
 dst_make_rel ("fname_PC");
 dst_make_rel ("fname_PH");
 dst_make_uni ("bd", 31);
 dst_make_uni ("bm", 12);
 dst_make_rel ("by");
 dst_make_rel ("plz");
 dst_make_rel ("loc");
 dst_make_rel ("state");
 dst_make_uni ("sex", 2);
 return (TRUE);
}

/******************************************************************************/
/* Public                                                                     */
/******************************************************************************/
/******************************************************************************/
/** initialize all distributions
 */
 
BOOL dst_init ()
{
 dst_list = NULL; /* leere Liste */
 return (dst_cfg_get ()); /* Konfiguration lesen */
}

/******************************************************************************/
/** free memory
 */
 
BOOL dst_exit ()
{
 while (dst_list != NULL) /* Schleife ber alle Distributionen */
 {
  PSX_DST * next = dst_list -> next;
  dst_delete (&dst_list); /* Verteilung freigeben */
  dst_list = next;
 }
 return (TRUE);
}

/******************************************************************************/
/** get distribution for attribute atr
 * @param atr string pointer to attribute
 * @param res pointer to distribution pointer, contains retrieved distribution
 *            pointer after invocation
 */
 
BOOL dst_get (char * atr,PSX_DST ** res)
{
 PSX_DST * dst;

 assert (res != NULL);

 /* look for distribution: */
 for (dst = dst_list; dst != NULL; dst = dst -> next)
 {
  if (!strcmp (atr,dst -> atr))
  {
   /* found => retur distribution */
   *res = dst;
   return (TRUE);
  }
 }

 /* error: distribution not found: */
 *res = NULL;
 LOG (MOD,"Requested distribution not found!");
 return (FALSE);
}

/******************************************************************************/
/** get probability for characteristic of distribution dst
 * @param dst pointer to distribution structure
 * @param val string pointer to valuestring, index in p
 * @param p pointer to array of double (probability) 
 */
 
BOOL dst_prob (PSX_DST * dst,char * val,double * p)
{
 BOOL r = TRUE;
 double n;
 int nval;
 int i;

 assert (dst != NULL && val != NULL && p != NULL);

 n = dst -> n; 

 switch (dst -> dst) /*          distributiontype                 */
 {
 case DST_UNI:
  *p = 1/n; /* evenly distributed probability                     */
  break;
 case DST_FIX:
  switch (dst -> keytype) /*          keytype                     */
  {
  case KEY_NO: /* no key, but direct index in array               */
   nval = atoi (val);
   *p = dst -> p[nval]; /* read probability from array            */
   break;
  case KEY_STR: /* key is string                                  */
   /* search array index for key:                                 */
   for (i = 0; i < dst -> n; i++) 
   {
    if (!strcmp (dst -> keys.str[i],val))
    {
     /* found: */
     *p = dst -> p[i];
     break;
    }
   }
   /* error: key not found                */
   *p = 0.0;
   LOG (MOD,"Key not found!");
   r = FALSE;
   break;
  case KEY_INT: /* key is int-value       */
   /* search array index for key:         */
   for (i = 0; i < dst -> n; i++)
   {
    if (dst -> keys.num[i] == nval)
    {
     /* found:                            */
     *p = dst -> p[i];
     break;
    }
   }
   /* error: key not found                */
   *p = 0.0;
   LOG (MOD,"Key not found!");
   r = FALSE;
   break;
  }
 default:         /* unknown distribution */
  *p = 0.0;
  r = FALSE;
  LOG (MOD, "Unknown distribution type!");
  break;
 }

 return (r);
}

/******************************************************************************/
/******************************************************************************/
/******************************************************************************/


