Legofit
infers population history from nucleotide site patterns.
|
Parse a .lgo file. More...
#include "error.h"
#include "lblndx.h"
#include "misc.h"
#include "network.h"
#include "param.h"
#include "parse.h"
#include "parstore.h"
#include "ptrqueue.h"
#include "sampndx.h"
#include "strptrmap.h"
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Macros | |
#define | CHECK_TOKEN(tok, orig) |
Abort if token is missing. More... | |
#define | ILLEGAL_INPUT(x, orig) |
Abort with an error message about illegal input. More... | |
#define | DUPLICATE_PAR(x, orig) |
Abort with an error message about duplicate parameter definition. More... | |
#define | ORDER_ERROR(word) |
Input statements out of order. More... | |
Functions | |
int | getDbl (double *x, char **next, const char *orig) |
Interpret token i as a double. More... | |
int | getULong (unsigned long *x, char **next, const char *orig) |
Interpret token i as an unsigned long integer. More... | |
int | getRange (double x[2], char **next, const char *orig) |
Read a range in form "[ 12, 34 ]". More... | |
void | parseParam (char *next, unsigned ptype, StrPtrMap *parmap, PtrQueue *fixedQ, PtrQueue *freeQ, PtrQueue *constrQ, Bounds *bnd, const char *orig) |
Parse a line of input defining a parameter. More... | |
void | parseSegment (char *next, StrPtrMap *popmap, SampNdx *sndx, LblNdx *lndx, ParStore *parstore, const char *orig) |
Parse a line describing a segment of the population tree. More... | |
void | parseDerive (char *next, StrPtrMap *popmap, ParStore *parstore, const char *orig) |
Parse a line of input describing a parent-offspring relationship between two nodes. More... | |
void | parseMix (char *next, StrPtrMap *popmap, ParStore *parstore, const char *orig) |
Parse a line of input describing gene flow. More... | |
static int | get_one_line (size_t n, char buff[n], FILE *fp) |
PtrPair | mktree (FILE *fp, SampNdx *sndx, LblNdx *lndx, Bounds *bnd) |
Parse an input file in .lgo format. More... | |
int | countSegments (FILE *fp) |
Count the number of "segment" statements in input file. | |
Variables | |
tipId_t | union_all_samples |
Parse a .lgo file.
Consider the following tree of populations:
a-------| |ab--| b--|bb--| | | |abc-- |c--------|
t = 0 1 3 5.5 inf
In this tree, a, b, c, bb, ab, and abc represent segments of the population tree. The input file begins with a series of "segment" statements that define each of the segments in the tree. The segment statements also provide the time (backwards from the present in generations) at which the segment starts, and the size, twoN, of the population. Optionally, it also provides the number of haploid samples observed in this segment of the tree.
The statements that follow the segment statements describe how the segments are connected. The "mix" statement is used when a segment originates as a mixture of two ancestral segments. The "derive" statement is used when a segment derives from a single ancestral segment.
No segment can have more than two "parents" or more than two "children".
Here is input that would generate the tree above:
time fixed T0=0 time free Tc=1 time free Tab=3 time free Tabc=5.5 twoN free twoNa=100 twoN fixed twoNb=123 twoN free twoNc=213.4 twoN fixed twoNbb=32.1 twoN free twoNab=222 twoN fixed twoNabc=1.2e2 mixFrac free Mc=0.8 segment a t=T0 twoN=twoNa samples=1 segment b t=T0 twoN=twoNb samples=2 segment c t=Tc twoN=twoNc samples=1 segment bb t=Tc twoN=twoNbb segment ab t=Tab twoN=twoNab segment abc t=Tabc twoN=twoNabc mix b from bb + Mc * c derive a from ab derive bb from ab derive ab from abc derive c from abc
#define CHECK_TOKEN | ( | tok, | |
orig | |||
) |
Abort if token is missing.
#define DUPLICATE_PAR | ( | x, | |
orig | |||
) |
Abort with an error message about duplicate parameter definition.
#define ILLEGAL_INPUT | ( | x, | |
orig | |||
) |
Abort with an error message about illegal input.
#define ORDER_ERROR | ( | word | ) |
Input statements out of order.
int getDbl | ( | double * | x, |
char ** | next, | ||
const char * | orig | ||
) |
Interpret token i as a double.
[out] | x | points to variable into which double value will be placed |
[in,out] | next | points to unparsed portion of input line |
int getRange | ( | double | x[2], |
char ** | next, | ||
const char * | orig | ||
) |
Read a range in form "[ 12, 34 ]".
Return 0 on success or 1 if range is not present. Abort if first character is "[" but the rest of the string is not interpretable as a range.
Referenced by parseParam().
int getULong | ( | unsigned long * | x, |
char ** | next, | ||
const char * | orig | ||
) |
Interpret token i as an unsigned long integer.
[out] | x | points to variable into which value will be placed |
[in,out] | next | points to unparsed portion of input line integer. |
Referenced by parseSegment().
Parse an input file in .lgo format.
[in,out] | fp | input file pointer |
[in,out] | sndx | associates the index of each sample with the node to which it belongs. |
[in,out] | lndx | associated index of each sample with its name |
[out] | parstore | structure that maintains info about parameters |
[in] | bnd | the bounds of each type of parameter |
Check the sanity of each node and make sure there is only one root.
References StrPtrMap_new().
Parse a line of input describing a parent-offspring relationship between two nodes.
[in] | next | unparsed portion of input line |
[in,out] | popmap | associates names of segments with pointers to them. |
References CHECK_TOKEN, nextWhitesepToken(), stripWhiteSpace(), and StrPtrMap_get().
Parse a line of input describing gene flow.
[in,out] | next | unparsed portion of input line |
[in,out] | popmap | associates names of segments with pointers to them. |
[out] | parstore | structure that maintains info about parameters |
References CHECK_TOKEN, nextWhitesepToken(), ParStore_getIndex(), stripWhiteSpace(), and StrPtrMap_get().
void parseParam | ( | char * | next, |
unsigned | ptype, | ||
StrPtrMap * | parmap, | ||
PtrQueue * | fixedQ, | ||
PtrQueue * | freeQ, | ||
PtrQueue * | constrQ, | ||
Bounds * | bnd, | ||
const char * | orig | ||
) |
Parse a line of input defining a parameter.
[in,out] | next | points to unparsed portion of input line |
[in] | ptype | TWON, TIME, or MIXFRAC |
[out] | parstore | structure that maintains info about parameters |
[in] | bnd | the bounds of each type of parameter |
[in] | orig | original input line |
References CHECK_TOKEN, getRange(), and nextWhitesepToken().
void parseSegment | ( | char * | next, |
StrPtrMap * | popmap, | ||
SampNdx * | sndx, | ||
LblNdx * | lndx, | ||
ParStore * | parstore, | ||
const char * | orig | ||
) |
Parse a line describing a segment of the population tree.
[in,out] | next | pointer to unparsed portion of input line |
[in,out] | popmap | associates names of segments with pointers to them. |
[in,out] | sndx | associates the index of each sample with the node to which it belongs. |
[in,out] | lndx | associated index of each sample with its name |
[out] | parstore | structure that maintains info about parameters |
References CHECK_TOKEN, getULong(), LblNdx_addSamples(), nextWhitesepToken(), ParStore_getIndex(), SampNdx_addSamples(), stripWhiteSpace(), and StrPtrMap_insert().