Legofit
infers population history from nucleotide site patterns.
Macros | Functions | Variables
parse.c File Reference

Parse a .lgo file. More...

#include "error.h"
#include "lblndx.h"
#include "misc.h"
#include "network.h"
#include "param.h"
#include "parse.h"
#include "parstore.h"
#include "ptrqueue.h"
#include "sampndx.h"
#include "strptrmap.h"
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

Macros

#define CHECK_TOKEN(tok, orig)
 Abort if token is missing. More...
 
#define ILLEGAL_INPUT(x, orig)
 Abort with an error message about illegal input. More...
 
#define DUPLICATE_PAR(x, orig)
 Abort with an error message about duplicate parameter definition. More...
 
#define ORDER_ERROR(word)
 Input statements out of order. More...
 

Functions

int getDbl (double *x, char **next, const char *orig)
 Interpret token i as a double. More...
 
int getULong (unsigned long *x, char **next, const char *orig)
 Interpret token i as an unsigned long integer. More...
 
int getRange (double x[2], char **next, const char *orig)
 Read a range in form "[ 12, 34 ]". More...
 
void parseParam (char *next, unsigned ptype, StrPtrMap *parmap, PtrQueue *fixedQ, PtrQueue *freeQ, PtrQueue *constrQ, Bounds *bnd, const char *orig)
 Parse a line of input defining a parameter. More...
 
void parseSegment (char *next, StrPtrMap *popmap, SampNdx *sndx, LblNdx *lndx, ParStore *parstore, const char *orig)
 Parse a line describing a segment of the population tree. More...
 
void parseDerive (char *next, StrPtrMap *popmap, ParStore *parstore, const char *orig)
 Parse a line of input describing a parent-offspring relationship between two nodes. More...
 
void parseMix (char *next, StrPtrMap *popmap, ParStore *parstore, const char *orig)
 Parse a line of input describing gene flow. More...
 
static int get_one_line (size_t n, char buff[n], FILE *fp)
 
PtrPair mktree (FILE *fp, SampNdx *sndx, LblNdx *lndx, Bounds *bnd)
 Parse an input file in .lgo format. More...
 
int countSegments (FILE *fp)
 Count the number of "segment" statements in input file.
 

Variables

tipId_t union_all_samples
 

Detailed Description

Parse a .lgo file.

Author
Alan R. Rogers

Consider the following tree of populations:

 a-------|
         |ab--|
 b--|bb--|    |
    |         |abc--
    |c--------|

t = 0 1 3 5.5 inf

In this tree, a, b, c, bb, ab, and abc represent segments of the population tree. The input file begins with a series of "segment" statements that define each of the segments in the tree. The segment statements also provide the time (backwards from the present in generations) at which the segment starts, and the size, twoN, of the population. Optionally, it also provides the number of haploid samples observed in this segment of the tree.

The statements that follow the segment statements describe how the segments are connected. The "mix" statement is used when a segment originates as a mixture of two ancestral segments. The "derive" statement is used when a segment derives from a single ancestral segment.

No segment can have more than two "parents" or more than two "children".

Here is input that would generate the tree above:

time fixed T0=0 time free Tc=1 time free Tab=3 time free Tabc=5.5 twoN free twoNa=100 twoN fixed twoNb=123 twoN free twoNc=213.4 twoN fixed twoNbb=32.1 twoN free twoNab=222 twoN fixed twoNabc=1.2e2 mixFrac free Mc=0.8 segment a t=T0 twoN=twoNa samples=1 segment b t=T0 twoN=twoNb samples=2 segment c t=Tc twoN=twoNc samples=1 segment bb t=Tc twoN=twoNbb segment ab t=Tab twoN=twoNab segment abc t=Tabc twoN=twoNabc mix b from bb + Mc * c derive a from ab derive bb from ab derive ab from abc derive c from abc

Macro Definition Documentation

◆ CHECK_TOKEN

#define CHECK_TOKEN (   tok,
  orig 
)
Value:
{ \
if((tok) == NULL) { \
fprintf(stderr, "%s:%d:" \
" input line incomplete in .lgo file.\n", \
__FILE__,__LINE__); \
fprintf(stderr," input: %s\n", (orig)); \
exit(EXIT_FAILURE); \
} \
}while(0);

Abort if token is missing.

◆ DUPLICATE_PAR

#define DUPLICATE_PAR (   x,
  orig 
)
Value:
do{ \
fprintf(stderr,"%s:%d: Duplicate parameter def: \"%s\"\n", \
__FILE__,__LINE__, (x)); \
fprintf(stderr," input: %s\n", (orig)); \
exit(EXIT_FAILURE); \
}while(0)

Abort with an error message about duplicate parameter definition.

◆ ILLEGAL_INPUT

#define ILLEGAL_INPUT (   x,
  orig 
)
Value:
do{ \
fprintf(stderr,"%s:%d: Illegal input: \"%s\"\n", \
__FILE__,__LINE__, (x)); \
fprintf(stderr," input: %s\n", (orig)); \
exit(EXIT_FAILURE); \
}while(0)

Abort with an error message about illegal input.

◆ ORDER_ERROR

#define ORDER_ERROR (   word)
Value:
do{ \
fprintf(stderr,"%s:%d: Order error at \"%s\" in .lgo file.\n", \
__FILE__,__LINE__, (word)); \
fprintf(stderr,"Parameter definitions should come first\n" \
" (twoN, time, mixFrac, and param),\n" \
" then \"segment\" statements, and finally \"mix\"\n" \
" and \"derive\" statements.\n"); \
exit(EXIT_FAILURE); \
}while(0)

Input statements out of order.

Function Documentation

◆ getDbl()

int getDbl ( double *  x,
char **  next,
const char *  orig 
)

Interpret token i as a double.

Parameters
[out]xpoints to variable into which double value will be placed
[in,out]nextpoints to unparsed portion of input line

◆ getRange()

int getRange ( double  x[2],
char **  next,
const char *  orig 
)

Read a range in form "[ 12, 34 ]".

Return 0 on success or 1 if range is not present. Abort if first character is "[" but the rest of the string is not interpretable as a range.

Referenced by parseParam().

◆ getULong()

int getULong ( unsigned long *  x,
char **  next,
const char *  orig 
)

Interpret token i as an unsigned long integer.

Parameters
[out]xpoints to variable into which value will be placed
[in,out]nextpoints to unparsed portion of input line integer.

Referenced by parseSegment().

◆ mktree()

PtrPair mktree ( FILE *  fp,
SampNdx sndx,
LblNdx lndx,
Bounds bnd 
)

Parse an input file in .lgo format.

Parameters
[in,out]fpinput file pointer
[in,out]sndxassociates the index of each sample with the node to which it belongs.
[in,out]lndxassociated index of each sample with its name
[out]parstorestructure that maintains info about parameters
[in]bndthe bounds of each type of parameter

Check the sanity of each node and make sure there is only one root.

References StrPtrMap_new().

◆ parseDerive()

void parseDerive ( char *  next,
StrPtrMap popmap,
ParStore parstore,
const char *  orig 
)

Parse a line of input describing a parent-offspring relationship between two nodes.

Parameters
[in]nextunparsed portion of input line
[in,out]popmapassociates names of segments with pointers to them.

References CHECK_TOKEN, nextWhitesepToken(), stripWhiteSpace(), and StrPtrMap_get().

◆ parseMix()

void parseMix ( char *  next,
StrPtrMap popmap,
ParStore parstore,
const char *  orig 
)

Parse a line of input describing gene flow.

Parameters
[in,out]nextunparsed portion of input line
[in,out]popmapassociates names of segments with pointers to them.
[out]parstorestructure that maintains info about parameters

References CHECK_TOKEN, nextWhitesepToken(), ParStore_getIndex(), stripWhiteSpace(), and StrPtrMap_get().

◆ parseParam()

void parseParam ( char *  next,
unsigned  ptype,
StrPtrMap parmap,
PtrQueue fixedQ,
PtrQueue freeQ,
PtrQueue constrQ,
Bounds bnd,
const char *  orig 
)

Parse a line of input defining a parameter.

Parameters
[in,out]nextpoints to unparsed portion of input line
[in]ptypeTWON, TIME, or MIXFRAC
[out]parstorestructure that maintains info about parameters
[in]bndthe bounds of each type of parameter
[in]origoriginal input line

References CHECK_TOKEN, getRange(), and nextWhitesepToken().

◆ parseSegment()

void parseSegment ( char *  next,
StrPtrMap popmap,
SampNdx sndx,
LblNdx lndx,
ParStore parstore,
const char *  orig 
)

Parse a line describing a segment of the population tree.

Parameters
[in,out]nextpointer to unparsed portion of input line
[in,out]popmapassociates names of segments with pointers to them.
[in,out]sndxassociates the index of each sample with the node to which it belongs.
[in,out]lndxassociated index of each sample with its name
[out]parstorestructure that maintains info about parameters

References CHECK_TOKEN, getULong(), LblNdx_addSamples(), nextWhitesepToken(), ParStore_getIndex(), SampNdx_addSamples(), stripWhiteSpace(), and StrPtrMap_insert().