/* psbblex.l
 *
 * Lexical analyser for extraction of bounding box properties from [E]PS,
 * or PDF files, in response to groff's .psbb request.
 *
 * Written by Keith Marshall <keith@users.osdn.me>
 * Copyright (C) 2017, Free Software Foundation, Inc.
 *
 * This file is part of groff.
 *
 * groff is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free
 * Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * groff is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
%{
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <errno.h>
#include <math.h>

#include "psbb.h"
#include "psbb.tab.h"

#if DEBUGGING
# define DEBUG(FOO)  FOO
# define DEBUG_ECHO  debug_msg( "%d: %s\n", YYSTATE, yytext )

# define DEBUG_MSG(ARGLIST)  do { debug_msg ARGLIST; } while(0)

# define DEBUG_RETURN(TOKEN, NAME)					\
    do { debug_msg("%d: return token %s (%d)\n", YYSTATE, NAME, TOKEN);	\
	 return TOKEN;							\
       } while(0)

static void debug_msg (const char *fmt, ...)
{ va_list av; va_start(av, fmt); vfprintf(stderr, fmt, av); va_end(av); }

#else
# define DEBUG(FOO)
# define DEBUG_ECHO
# define DEBUG_MSG(ARGLIST)
# define DEBUG_RETURN(TOKEN, NAME)  return TOKEN
#endif

#define RETURN(TOKEN)  DEBUG_RETURN(TOKEN, #TOKEN)

static int ref[2] = { 0, 0 };
static size_t xrefbase, xrefptr;
enum { PSBB_PHASE_INIT = 0, PSBB_IN_HEADER, PSBB_IN_TRAILER };
static int psbb_phase, psbb_crescendo_seek( void ), psbb_parse_status;

#define DEBUG_PDFINVOKE(STATE, TOKEN, NAME)				\
   do { yy_push_state( STATE ); DEBUG_RETURN(TOKEN, NAME);		\
      } while(0)

#define PDFINVOKE(STATE, TOKEN)  DEBUG_PDFINVOKE(STATE, TOKEN, #TOKEN)

#define PDFINVOKE_IF(FROM, STATE, TOKEN)				\
   if( yy_top_state() == FROM )  DEBUG_PDFINVOKE(STATE, TOKEN, #TOKEN)

#define PSBB_PARSE_FAILURE  ((psbb_parse_status = 1) & 0)
%}

%option stack noyywrap prefix = "psbb_"

%x SKIP UNKNOWN
%x PSHDR PSBB PSTRAILER PSATEND PSVOID
%x PDFINIT PDFTRAILER PDFDICT PDFSTARTXREF PDFXREF
%x PDFGETREF PDFGOXREF PDFGETOBJECT PDFSCANOBJECT PDFREFER
%x PDFEVAL PDFDUMP PDFIGNORE PDFOBJTYPE PDFKIDS PDFALLKIDS
%x PDFXREFCONT PDFXREFPREV PDFXREFWALK
%x PDFMEDIABOX PDFMEDIABOXEVAL

INTVAL   ([0-9]+)
FLOATVAL ([0-9]+"."[0-9]*)|("."[0-9]+)
SEP	 ([\000\t\f\r\n\040])

LINEDATA ([^\r\n]*)
EOL	 (\r?\n)|\r

READLN	 {LINEDATA}{EOL}

PDFNAME  ([^][(){}/%<>\000\t\f\r\n\040]+)
PDFSEP	 ([][(){}/%<>\000\t\f\r\n\040])

%{
static const char *psbb_input_file;

static int pdfseek( ssize_t offset )
{ /* Awkwardly, PDF files are not organized sequentially, and must be
   * scanned in (effectively) random block order.  This helper function
   * prepares the lexer to resume scanning at an arbitrary location.
   */
  yy_flush_buffer( YY_CURRENT_BUFFER );
  DEBUG_MSG(("%d: pdfseek to offset = %d\n", YYSTATE, offset));
  return fseek( yyin, offset, SEEK_SET );
}

static int pdf_trailer( void )
{ /* A local helper function, invoked from the INITIAL state rule when
   * the lexer input has been identified as a PDF file; it attempts to
   * locate the PDF file trailer, and reset to input context to its
   * starting offset.
   */
  if( (psbb_parse_status = psbb_crescendo_seek()) == EOF )
    yyerror( "PDF file '%s' is malformed; no trailer found", psbb_input_file );
  return psbb_parse_status;
}
%}
%%
 /* Pattern rules section: this defines the behaviour of yylex().  The
  * initial code block will be placed at the start of yylex() itself; it
  * provides a hook whereby the lexer may be forced back to the INITIAL
  * state, for each new input file to be scanned in sequence.
  */
%{			if( psbb_phase == PSBB_PHASE_INIT )
			{ psbb_phase = PSBB_IN_HEADER; BEGIN INITIAL;
			}
%}
 /* Unqualified patterns apply in start condition INITIAL only; we use
  * this to identify either PostScript or PDF input, or we bail out.
  */
.|\n			{ yymore(); BEGIN UNKNOWN; }
"%PDF-" 		{ BEGIN PDFINIT; if (pdf_trailer() == EOF) return 0; }
"%!PS-Adobe-"		{ BEGIN PSHDR; yy_push_state( SKIP ); }


 /* State: INITIAL
  *
  * We should have switched out of the INITIAL condition, as soon as any
  * input stream content has been scanned; if we reach EOF while still in
  * this condition, we were given a zero-length stream.
  */
<INITIAL><<EOF>>	{ yyerror( "file '%s' is empty", psbb_input_file );
			  return PSBB_PARSE_FAILURE;
			}

 /* State: SKIP
  *
  * We use the SKIP condition to swallow all input, after an initially
  * matched pattern, up to end of line, before resuming in a specified
  * condition for examination of the next line; (the obvious `.*\n' is
  * not sufficient here, since we need to be prepared to handle any of
  * the CR only, LF only, or CRLF line ending conventions).
  */
<SKIP>{READLN}		{ yy_pop_state(); }


 /* State: PSHDR
  *
  * Scanning state used exclusively while reading the header comments
  * within a PostScript file; any `%X', where `X' is any non-whitespace
  * character, is a valid comment, but the header must terminate at any
  * `%%EndComments' input, or any input line which does not match the
  * `%X' start-of-line requirement.
  */
<PSHDR>{
"%"[^ \t]		{ yy_push_state( SKIP ); }
"%%EndComments" 	{ BEGIN PSVOID; }
}

 /* States: PSHDR and PSTRAILER
  *
  * In the case of PostScript input files, our objective is to identify
  * a `%%BoundingBox:' specification within header or trailer comments,
  * and to interpret its bounding box arguments.  This start condition
  * is made active when scanning these file sections; it identifies the
  * requisite specification, then initiates the PSBB scanning state, to
  * interpret the arguments.
  */
<PSHDR,PSTRAILER>{
"%%BoundingBox:"	{ BEGIN PSBB; }
}

 /* States: PSHDR and PSVOID
  *
  * Scanning states provided as a shared resource, to facilitate the
  * diagnosis of a missing %%BoundingBox specification, when scanning
  * in either of the PSHDR or PSTRAILER contexts.
  */
<PSHDR,PSVOID>{
<<EOF>> 		|
.			{ yyerror( "no '%s' specification found in file '%s'",
			      "%%BoundingBox", psbb_input_file
			    );
			  return PSBB_PARSE_FAILURE;
			}
}

 /* State: PSBB
  *
  * Scanning state used exclusively to interpret the arguments to a
  * `%%BoundingBox:' comment, in either the PostScript file header, or
  * the trailer; we expect four space-separated numeric values, or (in
  * the header only) "(atend)".  In the former case, we return each
  * value separately; in the latter, we redirect the search to the
  * file trailer, where we hope to find four values.
  */
<PSBB>{
[ \t]+
{INTVAL}/{SEP}		{ yylval = atol( yytext ); RETURN(VALUE); }
{FLOATVAL}/{SEP}	{ yylval = lround( atof( yytext )); RETURN(VALUE); }
"(atend)"		{ if( psbb_phase == PSBB_IN_HEADER )
			  {
			    /* In header comments, `%%BoundingBox: (atend)'
			     * indicates that the real specification for the
			     * bounding box will found in the file trailer; we
			     * use a crescendo seek, from the end of the input
			     * file, with recursive invocation of the lexer
			     * itself, to locate this.
			     */
			    BEGIN PSATEND; psbb_crescendo_seek();
			  }
			  else
			  { /* We've already been redirected to the trailer,
			     * and found `%%BoundingBox: (atend)' again.
			     */
			    yyerror( "'%s' is not allowed in trailer of '%s'",
				yytext, psbb_input_file
			      );
			    return PSBB_PARSE_FAILURE;
			  }
			}
[^0-9 \t\r\n]+		{ yyerror( "psbb: %s", yytext ); }
{EOL}			{ return 0; }
}

 /* State: PSATEND
  *
  * This start condition is used exclusively within recursive invocations
  * of the lexer, initiated from the PSBB start condition, while performing
  * the crescendo seek for the PostScript file trailer.  Return is always
  * to the calling lexer instance, with non-zero placing the caller in the
  * appropriate condition for interpretation of the trailer.
  */
<PSATEND>{
"%%Trailer"		{ psbb_phase = PSBB_IN_TRAILER;
			  BEGIN PSTRAILER; return 1;
			}
.|\n
}

 /* State: PSTRAILER
  *
  * Scanning state used exclusively when scanning the PostScript file
  * trailer, after redirection by `%%BoundingBox: (atend)' in the header;
  * it looks for a further explicit bounding box specification within the
  * trailer, further redirecting to PSVOID if none is present.  (Notice
  * that there is no `%%BoundingBox:' pattern here; that is specified
  * above, in a start condition scope shared with PSHDR).
  */
<PSTRAILER>{
.			{ BEGIN PSTRAILER; yy_push_state( SKIP ); }
<<EOF>> 		{ BEGIN PSVOID; }
\n
}

 /* State: PDFINIT
  *
  * Scanning state used exclusively during crescendo_seek() on a PDF
  * file, to locate the trailer section whence the starting offset for
  * the primary cross reference index may be obtained.
  */
<PDFINIT>{
"trailer"/{PDFSEP}	{ BEGIN PDFTRAILER; return PDFSTART; }
.|\n
}

 /* State: PDFTRAILER
  *
  * Scanning state initiated on locating a PDF file trailer; it is
  * used to subsequently initiate parsing of the trailer dictionary,
  * and to establish the starting location for its associated cross
  * reference table.
  */
<PDFTRAILER>{
"<<"			{ yy_push_state( PDFDICT ); }
"startxref"/{SEP}	{ BEGIN PDFSTARTXREF; }
{SEP}+
.
}

 /* State: PDFSTARTXREF
  *
  * Scanning state initiated after locating a startxref record within
  * a PDF file trailer; its purpose is to return the PDF file offset of
  * the associated xref data to the parser.
  */
<PDFSTARTXREF>{
{INTVAL}/{SEP}		{ xrefbase = atol( yytext ); RETURN(PDFOBJREF); }
{SEP}+
}

 /* State: PDFDICT
  *
  * Scanning state initated on locating the opening "<<" token of any
  * PDF dictionary; here, we identify those dictionary entries which are
  * of interest, regardless of context, and switch to an appropriate new
  * start condition to handle each; (note that this lookup may be made
  * dependent on the context whence this start condition was attained,
  * by use of PDFINVOKE_IF to initiate the subsequent state switch).
  */
<PDFDICT>{
"/Root"/{PDFSEP}	{ PDFINVOKE_IF( PDFTRAILER, PDFREFER, PDFROOT ); }
"/Prev"/{PDFSEP}	{ if( yy_top_state() == PDFXREFCONT ) BEGIN PDFXREFWALK;
			  else yy_push_state( PDFIGNORE );
			}
"/Type"/{PDFSEP}	{ yy_push_state( PDFOBJTYPE ); }
"/Pages"/{PDFSEP}	{ yy_push_state( PDFREFER ); }
"/Kids"/{PDFSEP}	{ yy_push_state( PDFALLKIDS ); }
"/MediaBox"/{PDFSEP}	{ yy_push_state( PDFMEDIABOX ); }
"/"{PDFNAME}/{PDFSEP}	{ yy_push_state( PDFIGNORE ); }
">>"			{ yy_pop_state(); }
.|\n
}

 /* State: PDFOBJTYPE
  *
  * Scanning state initiated on identifying a /Type key within a PDF
  * object dictionary; it effectively causes the scanner to swallow the
  * object type designation, for those object types which we expect to
  * encounter, before reverting to the PDFDICT state, (also returning
  * a PDFOBJREF token to the parser, in the specific case when the
  * /Catalog object is identified).
  *
  * FIXME: we may need to add error reporting for detection of any
  * object type which we do not expect to encounter.
  */
<PDFOBJTYPE>{
"/Catalog"/{PDFSEP}	{ yy_pop_state(); RETURN(PDFOBJREF); }
"/Page"s?/{PDFSEP}	{ yy_pop_state(); }
{SEP}+
}

 /* States: PDFKIDS and PDFALLKIDS
  *
  * Scanning states employed to extract the first object reference from
  * a /Kids object dictionary entry.  Always entered via the PDFALLKIDS
  * state, whence the PDFREFER state is invoked to extract the first of
  * the indirect object references within the associated reference list;
  * on return, the state degrades to PDFKIDS, so causing any additional
  * references present to be ignored, before returning to the PDFDICT
  * state.
  */
<PDFALLKIDS>"["{SEP}*	{ BEGIN PDFKIDS; PDFINVOKE( PDFREFER, PDFOBJREF ); }
<PDFKIDS,PDFALLKIDS>{
"]"			{ yy_pop_state(); }
{INTVAL}/{SEP}
"R"/{PDFSEP}
{SEP}+
}

 /* State: PDFREFER
  *
  * Scanning state initiated when the anticipated PDF parsing context
  * represents a PDF object reference; it extracts the object index and
  * object version values, returning them separately to the parser, and
  * then expects, and returns the 'R' operator, before reverting to the
  * start condition whence this state was attained.
  */
<PDFREFER>{
"R"/{PDFSEP}		{ yy_pop_state(); RETURN('R'); }
{INTVAL}/{PDFSEP}	{ yylval = atol( yytext ); RETURN(VALUE); }
./({EOL}|"/")		{ yy_pop_state(); }
[ \t\r\n]+
}

 /* State: PDFMEDIABOX
  *
  * Scanning state initiated at commencement of parsing a PDF MediaBox
  * specification; after locating the opening bracket of the bounding
  * box array, control is delegated to the following PDFMEDIABOXEVAL
  * state, to capture the array values.
  */
<PDFMEDIABOX>{
"["			{ BEGIN PDFMEDIABOXEVAL; }
{SEP}+
}

 /* State: PDFMEDIABOXEVAL
  *
  * Scanning state initiated exclusively from the PDFMEDIABOX state, to
  * capture the values from the bounding box array; we require these to
  * be integers, but some applications specify them as floating point,
  * so we must be prepared to interpret either.
  */
<PDFMEDIABOXEVAL>{
{FLOATVAL}/{PDFSEP}	{ yylval = lround( atof( yytext )); RETURN(VALUE); }
{INTVAL}/{PDFSEP}	{ yylval = atol( yytext ); RETURN(VALUE); }
"]"			{ yy_pop_state(); }
{SEP}+
}

 /* State: PDFEVAL
  *
  * Scanning state initiated when we expect an integer value token in the
  * PDF parse stream; swallow leading white space, capture the token, then
  * revert to the state whence this condition was invoked.
  */
<PDFEVAL>{
{INTVAL}/{PDFSEP}	{ yylval = atol( yytext ); yy_pop_state(); RETURN(VALUE); }
[ \t\r\n]+
}

 /* State: PDFIGNORE
  *
  * Scanning state in which all input is ignored, until the next EOL,
  * or the next PDF dictionary key, or possible dictionary terminator.
  */
<PDFIGNORE>{
./({EOL}|[/>])		{ DEBUG_ECHO; yy_pop_state(); }
.			{ yymore(); }
}

 /* State: PDFXREF
  *
  * Scanning state initiated after we have repositioned the PDF stream to
  * a point where we expect to find an "xref" table; confirm this position
  * is as expected, then delegate "xref" lookup to the following PDFGETREF
  * start condition.
  */
<PDFXREF>{
"xref"{SEP}+		{ xrefptr += yyleng; BEGIN PDFGETREF; return PDFLOOKUP; }
.|\n			{ yyerror( "in '%s'; expected 'xref', but found '%s'",
			      psbb_input_file, yytext
			    );
			  return PSBB_PARSE_FAILURE;
			}
}

 /* State: PDFGETREF
  *
  * Scanning state initiated exclusively from the PDFXREF state, after
  * verification of the "xref" parse context, to lookup the offset of the
  * PDF object with index specified in global variable "ref[0]", and with
  * generation count as specified in "ref[1]".  We begin by capturing a
  * a pair of integer values, representing the base index and span for
  * the current "xref" table...
  */
<PDFGETREF>{
{INTVAL}/{SEP}		{ xrefptr += yyleng; yylval = atol( yytext ); }
{SEP}+			{ xrefptr += yyleng; RETURN(VALUE); }
}

 /* State: PDFXREFCONT
  *
  * Scanning state initiated when a specific object reference is not
  * represented within the currently accessible segment of a PDF xref
  * table; it first looks for any immediately following segment of the
  * xref table, which may include the reference, ultimately falling
  * through to the following trailer dictionary, in which case, the
  * PDFXREFPREV state is invoked, attempting to follow a /Prev link
  * to an earlier generation of the xref table.
  */
<PDFXREFCONT>{
{INTVAL}/{SEP}		{ yyless(0); BEGIN PDFGETREF; RETURN(PDFLOOKUP); }
"trailer"/{PDFSEP}	{ yy_push_state( PDFXREFPREV ); }
{SEP}+			{ xrefptr += yyleng; }
}

 /* State: PDFXREFPREV
  *
  * Scanning state initiated on fall through from the PDFXREFCONT state,
  * into the PDF trailer; it looks for the start of the trailer dictionary,
  * then switches to a PDFDICT scan to locate the /Prev key, whence the
  * PDFXREFWALK state is invoked, to follow the /Prev link.
  */
<PDFXREFPREV>{
"<<"			{ BEGIN PDFDICT; }
{SEP}+
}

 /* State: PDFXREFWALK
  *
  * Scanning state initiated after identification of the /Prev key in a
  * PDF trailer dictionary; it repositions the file input pointer to the
  * associated offset value, before restarting the PDFXREF scan.
  */
<PDFXREFWALK>{
{INTVAL}/{PDFSEP}	{ pdfseek( xrefptr = atol( yytext )); BEGIN PDFXREF; }
{SEP}+
}

 /* State: PDFGOXREF
  *
  * Scanning state initiated after locating a PDF xref table entry for a
  * specified object;
  */
<PDFGOXREF>{READLN}	{ long offset, gen; char disp;
			  sscanf( yytext, "%10ld %5ld %c", &offset, &gen, &disp );
			  DEBUG_MSG(("%d: %.18s --> %d; %d %c\n", YYSTATE, yytext, offset, gen, disp));
			  if( disp == 'n' && gen == ref[1] )
			  { pdfseek( offset ); BEGIN PDFGETOBJECT;
			  }
			  else
			  { yyerror( "index entry '%.18s' unexpected in file '%s'",
			        yytext, psbb_input_file
			      );
			    return PSBB_PARSE_FAILURE;
			  }
			}

 /* State: PDFGETOBJECT
  *
  * Scanning state initiated when the PDF input pointer has been set
  * to the start of a specific object; it returns the associated object
  * identification tokens to the parser, for confirmation of expected
  * object identity, before switching to the PDFSCANOBJECT state, to
  * scan the associated object data.
  */
<PDFGETOBJECT>{
"obj"/{PDFSEP}		{ BEGIN PDFSCANOBJECT; RETURN(PDFOBJECT); }
{INTVAL}/{SEP}		{ yylval = atol( yytext ); RETURN(VALUE); }
{SEP}+
}

 /* State: PDFSCANOBJECT
  *
  * Scanning state initiated when scanning PDF object data; effectively,
  * it ignores all content, up to the terminating "endobj" token, except
  * for the content of any embedded object dictionary, which is scanned
  * in the PDFDICT state.
  */
<PDFSCANOBJECT>{
{SEP}*"<<"		{ yy_push_state( PDFDICT ); }
"endobj"/{SEP}		{ DEBUG_ECHO; RETURN(PDFENDOBJ); }
(.|\n)
}

 /* State: UNKNOWN
  *
  * Finally, the UNKNOWN scanning state is activated when the INITIAL scan
  * of the first input line fails to recognize the file signature; it causes
  * the lexer to bail out immediately.
  */
<UNKNOWN>[^\r\n]*	{ yyerror( "unknown file signature '%s' in file '%s'",
			      yytext, psbb_input_file
			    );
			  return PSBB_PARSE_FAILURE;
			}
%%
/* General code section: this provides the implementation for the
 * parser and lexical analyser API, servicing groff's psbb request.
 */
int psbb_parser_status_check;
void psbb_get_bounding_box( const char *source )
{
  /* This is the primary entry point for the parser/lexer combination;
   * it sets up the specified source file as the lexer input, then...
   */
  psbb_parser_status_check = EOF;
  if( (yyin = psbb_open_file_for_parse( psbb_input_file = source )) != NULL )
  {
    /* ...when successful, forces the lexer to enter its initial state,
     * and invokes the parser to process the sequence of tokens which the
     * lexer returns.
     */
    psbb_parse_status = 0;
    psbb_phase = PSBB_PHASE_INIT;
    psbb_parser_status_check = yyparse() | psbb_parse_status;
    yy_flush_buffer( YY_CURRENT_BUFFER );
  }
}

static int psbb_crescendo_seek()
{
  /* A helper function to iteratively search for any pattern,
   * close to the end of the file, which causes the lexer to
   * return a non-zero token.  Initial search is limited to a
   * block of 64 bytes, at the bitter end of the file; on each
   * subsequent iteration, the block size is doubled, until a
   * successful match is found, or the block size grows to
   * exceed the size of the file.
   */
  ssize_t offset;
  for( offset = 64L; offset > 0L; offset <<= 1 )
  {
    /* In this case, we use a crescendo seek, with
     * recursive invocation of the lexer itself, to
     * locate the start of the trailer...
     */
    int status;
    yy_flush_buffer( YY_CURRENT_BUFFER );
    if( (status = fseek( yyin, -offset, SEEK_END )) != 0 )
      /*
       * ...with a "last chance" search of the whole
       * file, if the crescendo overruns the start of
       * the file without finding it...
       */
      status = fseek( yyin, offset = 0L, SEEK_SET );

    if( (status == 0) && (yylex() > 0) )
      /*
       * ...breaking out of the crescendo cycle, as
       * soon as we find it, (or we've searched the
       * entire file without finding it).
       */
      return 0;
  }
  return EOF;
}

void psbb_locate( int index, int generation )
{
  /* PDF object location function, invoked by the parser
   * when processing a PDFOBJREF token sequence, i.e. one
   * of:--
   *
   *   PDFOBJREF VALUE VALUE 'R', or
   *   VALUE VALUE 'R' PDFOBJREF
   */
  ref[0] = index;
  ref[1] = generation;
}

void psbb_walk( void )
{
  /* Helper function, invoked by the parser when processing
   * a root PDFOBJREF token, or PDFENDOBJ token, to walk the
   * chain of PDF object references from the document root,
   * until the first leaf node, (nominally expected to be
   * the first /Page object), has been located.
   */
  if( ref[0] > 0 )
  { /* The last object parsed has at least one child object
     * reference; reset the scanner context, to locate and
     * process the first such object.
     */
    BEGIN PDFXREF; pdfseek( xrefptr = xrefbase );
  }
  else
  { /* The last object parsed is a leaf node object; ensure
     * that there is no residual data in the input buffer,
     * and force EOF on the next input operation.
     */
    yy_flush_buffer( YY_CURRENT_BUFFER );
    fseek( yyin, 0, SEEK_END );
  }
}

void psbb_lookup( int base, int span )
{
  /* A helper function, invoked (possibly iteratively) by
   * the lexer, as a callback via the parser, during the
   * sequence of start conditions initiated from PDFXREF,
   * while handling a psbb_locate() request, to retrieve
   * a possible xref table entry for the object identified
   * by global index ref[0], within a section of the table
   * representing span objects, contiguously numbered from
   * the specified base index.
   */
  if( (ref[0] >= base) && (ref[0] < (base + span)) )
  {
    /* The required xref entry lies within the span of the
     * xref table section at the current xrefptr offset; we
     * simply adjust the xrefptr to the start of the entry
     * required, and follow the reference.
     */
    pdfseek( xrefptr + 20 * (ref[0] - base) );
    BEGIN PDFGOXREF;
  }
  else
  { /* The required xref entry is NOT accessible from the
     * xref table section at the current xrefptr offset; we
     * move the xrefptr just beyond the current section of
     * the table, then switch to the transient PDFXREFCONT
     * state, to search in any subsequent section of the
     * table, or to follow any /Prev link to an earlier
     * generation of it.
     */
    pdfseek( xrefptr += 20 * span );
    BEGIN PDFXREFCONT;
  }
  DEBUG_MSG(("%d: lookup object #%d @ %d within %d..%d\n",
      YYSTATE, ref[0], xrefptr, base, base + span
    ));
}

static
int pdf_object_lookup_failed( const char *desc, int wanted, int found )
{
  /* A local helper function, invoked by the following psbb_chkref()
   * function, to report a PDF lookup mismatch for either the requested
   * object number, or its generation number.
   */
  yyerror( "object reference mismatch in '%s': expected %s %d but found %d",
      psbb_input_file, desc, wanted, found
    );
  return PSBB_PARSE_FAILURE;
}

int psbb_chkref( int obj, int gen )
{
  /* A helper function, invoked by the parser, to confirm that a
   * PDF object reference lookup has located the correct object,
   * or diagnose otherwise.
   */
  if( obj != ref[0] )
    return pdf_object_lookup_failed( "object", ref[0], obj );

  if( gen != ref[1] )
    return pdf_object_lookup_failed( "generation", ref[1], gen );

  DEBUG_MSG(("%d: object: %d; generation = %d\n", YYSTATE, obj, gen));
  return ref[0] = 0;
}

/* vim: set cin fo=croqj: */
