/* #module    IdxParse    "3-001"
 ***********************************************************************
 *                                                                     *
 * The software was developed at the Monsanto Company and is provided  *
 * "as-is".  Monsanto Company and the auther disclaim all warranties   *
 * on the software, including without limitation, all implied warran-  *
 * ties of merchantabilitiy and fitness.                               *
 *                                                                     *
 * This software does not contain any technical data or information    *
 * that is proprietary in nature.  It may be copied, modified, and     *
 * distributed on a non-profit basis and with the inclusion of this    *
 * notice.                                                             *
 *                                                                     *
 ***********************************************************************
 */

/*
 * Module Name:	IdxParse
 *
 * Author:	R L Aurbach	CR&DS MIS Group    26-Apr-1986
 *
 * Function:
 *	Parse an input line produced by LaTeX \index command into a form usable
 *	for the generation of an automatic index.
 *
 * Modification History:
 *
 * Version     Initials	   Date		Description
 * ------------------------------------------------------------------------
 * 1-001	RLA	26-Apr-1986	Original Code
 * 2-002	RLA	10-Apr-1987	Add support for the Idx_Extract routine.
 * 2-003	RLA	15-Apr-1987	Add support for cross-referencing
 * 2-004	RLA	20-Apr-1987	Change the cross reference symbol to 
 * 3-001	F.H.	17-May-1991	converted to portable C
 */
/*
 * Module IdxParse - Module-Wide Data Description Section
 *
 * Include Files:
 */
#ifdef MSDOS
#include <stdlib.h>
#include <io.h>
#define F_OK		0	/* access(): File exists */
#else
#include <sys/file.h>
extern char *sprintf();
#endif
#include <stdio.h>
#include <string.h>
#include "IdxDef.h"
/*
 * Module Definitions:
 */
#define   TRUE	1
#define   FALSE	0
/*
 * Global Declarations:
 */
/*
 * Static Declarations:
 */
#ifdef MSDOS
void idx_parse(char *linebf, char *token_1, char *token_2,
	char *token_3, char *page_no, int *token_ct, int *flag);
int idx_extract(char *string, int *start, int *length);
#else
void idx_parse();
int idx_extract();
#endif
/*
 * External References:
 */
/*
 * Functions Called:
 */
/*
 * Function Idx_Parse - Documentation Section
 *
 * Discussion:
 *	Parse an input line which is of the general form
 *	    \indexentry{token_1>token_2>token_3}{page_no}
 *	into tokens for "token_1", "token_2", "token_3", and "page_no".
 *	Handle missing tokens and allow for imbedded LaTeX commands.
 *
 * Calling Synopsis:
 *	Call Idx_Parse (linebf, token_1, token_2, token_3, page_no, token_ct, 
 *									flag)
 *
 * Inputs:
 *	linebf	    ->	is the input string containing the line to parse.  
 *			ASCIZ string passed by reference.
 *
 * Outputs:
 *	token_1	    ->	is the first token seen (if any).
 *			ASCIZ string passed by reference.
 *
 *	token_2	    ->	is the second token seen (if any).
 *			ASCIZ string passed by reference.
 *
 *	token_3	    ->	is the third token seen (if any).
 *			ASCIZ string passed by reference.
 *
 *	page_no	    ->	is the page number token (should always be present).
 *			ASCIZ string passed by refernce.
 *
 *	token_ct    ->	is the number of tokens seen.
 *			Integer passed by reference.
 *
 *	flag	    ->	indicates if the page reference is a cross-reference.
 *			Boolean passed by reference.
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	returns with the record processed.
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. Initialize variables.
 *	B. Verify that the line begins with \indexentry.
 *	C. Extract the index item substring.
 *	D. Parse it into tokens.
 *	E. Extract the page reference substring.
 *
 * Special Notes:
 *	This logic is designed to:
 *	    * copy sequences such as \{, \}, and $>$ from the input stream
 *	      into tokens without recognizing them as token delimiters.
 *	    * allow sequences such as {\bf text} in token without recognizing
 *	      the special characters as delimiters.
 *	    * allow up to three levels of index and one level of page-no,
 *	      ignoring all other characters (which shouldn't exist...)
 */
/*
 * Function Idx_Parse - Code Section
 */
void idx_parse(linebf,token_1,token_2,token_3,page_no,token_ct,flag)
     char *linebf;	    /* Input string to be parsed	*/
     char *token_1;	    /* First token string		*/
     char *token_2;	    /* Second token string		*/
     char *token_3;	    /* Third token string		*/
     char *page_no;	    /* Page number token string		*/
     int *token_ct;	    /* Number of item tokens seen	*/
     int *flag;		    /* Cross-Reference flag		*/
{
/*
 * Local Declarations
 */
  int	i;		    /* linebf counter			*/
  int	next;		    /* beginning of next substring	*/
  int	len;		    /* length of substring		*/
  int	math_mode = FALSE;  /* math-mode flag			*/
  char	*ptr;		    /* pointer to token buffer		*/
  int	index;		    /* index into token buffer		*/
  int	end;		    /* end of substring			*/
  int dummy;
/*
 * Module Body
 */
/* Initialize assorted variables, flags, etc. */
  token_1[0] = '\0';
  token_2[0] = '\0';
  token_3[0] = '\0';
  page_no[0] = '\0';
  *token_ct  = 0;
  *flag	   = FALSE;
  ptr = token_1;
  index = 0;
/* Verify that the string begins with \indexentry */
  if (strncmp(linebf, "\\indexentry{", 12) != 0) return;
/* Locate the first substring. */
  i = 11;
  next = idx_extract(linebf, &i, &len);
  if (len == 0)	return;
  *token_ct = 1;
  end = i + len;
/* For all characters in the substring, parse out the tokens. */
  for ( ; i < end; i++) {
    switch (linebf[i]) {
    case '$' :
      if (linebf[i-1] != '\\') {
	if (math_mode)  math_mode = FALSE;
	else		math_mode = TRUE;
      }
      ptr[index++] = linebf[i];
      break;
    case '>' :
      if (math_mode)  ptr[index++] = linebf[i];
      else {
	if (index == 0)	break;
	ptr[index] = '\0';
	index = 0;
	switch (*token_ct) {
	case 1 :
	  ptr = token_2;  *token_ct = 2; break;
	case 2 :
	  ptr = token_3;  *token_ct = 3; break;
	}
      }
      break;
    case '&' :
      if (linebf[i-1] == '\\') {
	ptr[index++] = linebf[i];
	break;
      }
      ptr[index] = '\0';
      dummy = end-i;
      (void)sprintf(page_no, "\\indexentry{%.*s}{}", 
		    dummy, &linebf[++i]);
      *flag = TRUE;
      return;
    default :
      ptr[index++] = linebf[i]; break;
    }
  }
  ptr[index] = '\0';
/* Now parse the second substring and copy it to the page-ref. */
  i = next;
  next = idx_extract(linebf, &i, &len);
  (void)strncpy (page_no, &linebf[i], len);
  page_no[len] = '\0';
}

/*
 * Function Idx_Extract - Documentation Section
 *
 * Discussion:
 *	Extract a string contained within a {} pair, which may contain imbedded
 *	LaTeX commands.
 *
 * Calling Synopsis:
 *	next = Idx_Extract (string, start, length)
 *
 * Inputs:
 *	string	    ->	is an ASCIZ string containing a substring enclosed in
 *			matching braces ({}).
 *
 *	start	    ->	is the string index of the first character of the
 *			string to be searched.  It is passed by reference.
 *
 * Outputs:
 *	start	    ->	is the string index of the first character of the
 *			substring, passed by reference.
 *
 *	length	    ->	is the number of characters in the substring, passed by
 *			reference.
 *
 * Return Value:
 *	next	    ->	is the string index of the first character after the
 *			'}' which terminates the substring.  If the substring
 *			is not terminated by '}', next is the string index of
 *			the '\0' character.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	none
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. Scan the string for the first '{'.
 *	B. For all succeeding characters in the string,
 *	    1. If the character is '{' (not preceeded by '\\'),
 *		a. Increment the brace count.
 *	    2. If the character is '}' (not preceeded by '\\'),
 *		a. Decrement the brace count.
 *		b. If the brace count = 0,
 *		    1. Return the start and length of the substring.
 *
 * Special Notes:
 *	none
 */
/*
 * Function Idx_Extract - Code Section
 */
int idx_extract (string, start, length)
     char   *string;
     int    *start;
     int    *length;
{
/*
 * Local Declarations
 */
  int	    i;		    /* string index */
  int	    brace_ct = 0;   /* brace count  */
/*
 * Module Body
 */
/*
 * Scan the string for the first occurrence of a '{' character which is not
 * preceeded by a '\\'.  The next character marks the beginning of the 
 * substring to be extracted.
 */
  for (i = *start; string[i] != '\0'; i++) {
    if (string[i] != '{') continue;
    if (i > 0) if (string[i-1] == '\\')    continue;
    *start = i + 1;
    brace_ct++;
    break;
  }
/*
 * If brace_ct == 0, no leading { was found.  This is an error.  Return
 * start = next = end-of-string and length = 0.
 */
  if (brace_ct == 0) {
    *start = sizeof(string);
    *length = 0;
    return (sizeof(string));
  }
/*
 * Now search the string for a matching }.
 */
  for (i = *start; string[i] != '\0'; i++) {
    if ((string[i] == '{') && (string[i-1] != '\\')) brace_ct++;
    if ((string[i] == '}') && (string[i-1] != '\\')) {
      brace_ct--;
      if (brace_ct == 0) {
	*length = i - *start;
	return (i + 1);
      }
    }
  }
/*
 * If we get here, no closing } was found.  Treat the end of string as a
 * matching }.
 */
  *length = sizeof(string) - *start;
  return (sizeof(string));
}
