Colibri Core
Public Types | Public Member Functions | Public Attributes | Protected Member Functions | Protected Attributes | List of all members
PatternModel< ValueType, ValueHandler, MapType, PatternType > Class Template Reference

A model mapping patterns to values, gigh-level interface. More...

#include <patternmodel.h>

Inheritance diagram for PatternModel< ValueType, ValueHandler, MapType, PatternType >:
PatternModelInterface PatternStoreInterface PatternStoreInterface

Public Types

typedef MapType::iterator iterator
 
typedef MapType::const_iterator const_iterator
 

Public Member Functions

 PatternModel (IndexedCorpus *corpus=NULL)
 
 PatternModel (std::istream *f, PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 ~PatternModel ()
 
 PatternModel (const std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
virtual int getmodeltype () const
 
virtual int getmodelversion () const
 
virtual size_t size () const
 
virtual bool has (const Pattern &pattern) const
 
virtual bool has (const PatternPointer &pattern) const
 
virtual void load (std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL)
 
virtual void load (std::istream *f, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL)
 
PatternModelInterfacegetinterface ()
 
virtual void train (std::istream *in, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false)
 
virtual void train (const std::string &filename, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false)
 
virtual int computeskipgrams (const PatternPointer &pattern, int mintokens=2, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, std::vector< PatternPointer > *targetcontainer=NULL, const bool exhaustive=false, const int maxskips=3, const bool DEBUG=false)
 
virtual int computeskipgrams (const PatternPointer &pattern, PatternModelOptions &options, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, const bool exhaustive=false)
 
virtual std::vector< PatternPointerfindskipgrams (const PatternPointer &pattern, unsigned int occurrencethreshold=1, int maxskips=3)
 
virtual void trainskipgrams (const PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL)
 
void test (MapType &target, std::istream *in)
 
void write (std::ostream *out)
 
void write (const std::string filename)
 
virtual int maxlength () const
 
virtual int minlength () const
 
virtual unsigned int occurrencecount (const Pattern &pattern)
 
virtual unsigned int occurrencecount (const PatternPointer &pattern)
 
virtual ValueType * getdata (const Pattern &pattern, bool makeifnew=false)
 
virtual ValueType * getdata (const PatternPointer &pattern, bool makeifnew=false)
 
virtual unsigned int types ()
 
virtual unsigned int tokens () const
 
unsigned char type () const
 
unsigned char version () const
 
void output (std::ostream *)
 
unsigned int coveragecount (const Pattern &key)
 
double coverage (const Pattern &key)
 
std::vector< PatternPointergetreverseindex (const IndexReference ref, int occurrencecount=0, int category=0, unsigned int size=0)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_bysentence (int sentence)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_right (const IndexReference ref)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_left (const IndexReference ref)
 
void computestats ()
 
virtual void resetstats ()
 
virtual void computecoveragestats (int category=0, int n=0)
 
unsigned int totaloccurrencesingroup (int category, int n)
 
unsigned int totalpatternsingroup (int category, int n)
 
unsigned int totalwordtypesingroup (int category, int n)
 
unsigned int totaltokensingroup (int category, int n)
 
double frequency (const Pattern &pattern)
 
virtual void add (const PatternPointer &patternpointer, const IndexReference &ref)
 
virtual void add (const Pattern &pattern, ValueType *value, const IndexReference &ref)
 
virtual void add (const PatternPointer &pattern, ValueType *value, const IndexReference &ref)
 
unsigned int prune (int threshold, int _n=0)
 
virtual unsigned int pruneskipgrams (unsigned int threshold, int minskiptypes=2, int _n=0)
 
unsigned int prunenotinset (const std::unordered_set< Pattern > &s, int _n)
 
template<class ValueType2 , class ValueHandler2 , class MapType2 >
unsigned int prunebymodel (PatternModel< ValueType2, ValueHandler2, MapType2 > &secondmodel)
 
std::vector< std::pair< Pattern, int > > getpatterns (const Pattern &pattern)
 
virtual void print (std::ostream *out, ClassDecoder &decoder)
 
virtual void printreverseindex (std::ostream *out, ClassDecoder &decoder)
 
void printmodel (std::ostream *out, ClassDecoder &decoder)
 
virtual void print (std::ostream *out, ClassDecoder &decoder, const PatternType &pattern, bool endline=true)
 
void printpattern (std::ostream *out, ClassDecoder &decoder, const Pattern &pattern, bool endline=true)
 
void histogram (std::map< unsigned int, unsigned int > &hist, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0)
 
unsigned int topthreshold (int amount, int category=0, int size=0)
 
void histogram (std::ostream *OUT, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0)
 
void info (std::ostream *OUT)
 
void report (std::ostream *OUT)
 
PatternSet< uint64_t > extractset (int minlength=1, int maxlength=1)
 
virtual void outputrelations (const Pattern &pattern, ClassDecoder &classdecoder, std::ostream *OUT)
 
virtual t_relationmap getsubchildren (const Pattern &pattern, int=0, int=0, int=0)
 
virtual t_relationmap getsubparents (const Pattern &pattern, int=0, int=0, int=0)
 
virtual t_relationmap gettemplates (const Pattern &pattern, int=0)
 
virtual t_relationmap getinstances (const Pattern &pattern, int=0)
 
virtual t_relationmap getskipcontent (const PatternPointer &pattern)
 
virtual t_relationmap getleftneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0)
 
virtual t_relationmap getrightneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0)
 
virtual t_relationmap_double getnpmi (const Pattern &pattern, double threshold)
 
virtual int computeflexgrams_fromskipgrams ()
 
virtual int computeflexgrams_fromcooc ()
 
virtual void outputcooc_npmi (std::ostream *OUT, ClassDecoder &classdecoder, double threshold)
 
virtual void outputcooc (std::ostream *OUT, ClassDecoder &classdecoder, double threshold)
 
- Public Member Functions inherited from PatternModelInterface
virtual PatternStoreInterfacegetstoreinterface ()
 
virtual PatternStoreInterfacegetstoreinterface ()
 

Public Attributes

IndexedCorpusreverseindex
 Pointer to the reverse index and corpus data for this model (or NULL) More...
 
bool reverseindex_internal
 
bool hasskipgrams
 Does this model have skipgrams? More...
 

Protected Member Functions

virtual void postread (const PatternModelOptions options)
 
virtual void posttrain (const PatternModelOptions options)
 

Protected Attributes

unsigned char model_type
 
unsigned char model_version
 
uint64_t totaltokens
 Total number of tokens in the original corpus, so INCLUDES TOKENS NOT COVERED BY THE MODEL! More...
 
uint64_t totaltypes
 Total number of unigram/word types in the original corpus, SO INCLUDING NOT COVERED BY THE MODEL! More...
 
int maxn
 
int minn
 
std::set< int > cache_categories
 
std::set< int > cache_n
 
std::map< int, std::map< int, unsigned int > > cache_grouptotal
 total occurrences (used for frequency computation, within a group) More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotalpatterns
 total distinct patterns per group More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotalwordtypes
 total covered word types per group More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotaltokens
 total covered tokens per group More...
 
std::map< int, std::vector< uint32_t > > gapmasks
 pre-computed masks representing possible gap configurations for various pattern lengths More...
 

Detailed Description

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
class PatternModel< ValueType, ValueHandler, MapType, PatternType >

A model mapping patterns to values, gigh-level interface.

Template Parameters
ValueTypeThe type of Value this model stores
ValueHandlerA handler class for this type of value
MapTypeThe type of container to use

Member Typedef Documentation

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
typedef MapType::const_iterator PatternModel< ValueType, ValueHandler, MapType, PatternType >::const_iterator
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
typedef MapType::iterator PatternModel< ValueType, ValueHandler, MapType, PatternType >::iterator

Constructor & Destructor Documentation

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
PatternModel< ValueType, ValueHandler, MapType, PatternType >::PatternModel ( IndexedCorpus corpus = NULL)
inline

Begin a new pattern model, optionally pre-setting a reverseindex.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
PatternModel< ValueType, ValueHandler, MapType, PatternType >::PatternModel ( std::istream *  f,
PatternModelOptions  options,
PatternModelInterface constrainmodel = NULL,
IndexedCorpus corpus = NULL 
)
inline

Read a pattern model from an input stream

Parameters
fThe input stream
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
corpusPointer to the loaded corpus, used as a reverse index.
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
PatternModel< ValueType, ValueHandler, MapType, PatternType >::~PatternModel ( )
inline
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
PatternModel< ValueType, ValueHandler, MapType, PatternType >::PatternModel ( const std::string &  filename,
const PatternModelOptions options,
PatternModelInterface constrainmodel = NULL,
IndexedCorpus corpus = NULL 
)
inline

Read a pattern model from file

Parameters
filenameThe input filename
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
corpusPointer to the loaded corpus, used as a reverse index.

Member Function Documentation

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::add ( const PatternPointer patternpointer,
const IndexReference ref 
)
inlinevirtual

Add a pattern, with a given position, to the model. This is called during training at every time an instance of a pattern is found in the data. This is the high-level version.

Parameters
patternThe pattern to add (a patternpointer)
refThe position in the corpus where the patterns occurs

Reimplemented in IndexedPatternPointerModel< MapType >, and PatternPointerModel< ValueType, ValueHandler, MapType >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::add ( const Pattern pattern,
ValueType *  value,
const IndexReference ref 
)
inlinevirtual

Add a pattern, with a given position, and a value to the model. This is called during training at every time an instance of a pattern is found in the data. This is the low-level version.

Parameters
patternThe pattern to add
valueA pointer to the value for this pattern, what kind of value depends on the ValueType template parameter.
refThe position in the corpus where the patterns occurs

Reimplemented in IndexedPatternModel< MapType, PatternType >, and IndexedPatternModel< MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::add ( const PatternPointer pattern,
ValueType *  value,
const IndexReference ref 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::computecoveragestats ( int  category = 0,
int  n = 0 
)
inlinevirtual

Compute coverage statistics on the model, will generally be called automatically by methods who use it, and the statistics are cached after computation.

Reimplemented in IndexedPatternModel< MapType, PatternType >, and IndexedPatternModel< MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual int PatternModel< ValueType, ValueHandler, MapType, PatternType >::computeflexgrams_fromcooc ( )
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual int PatternModel< ValueType, ValueHandler, MapType, PatternType >::computeflexgrams_fromskipgrams ( )
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual int PatternModel< ValueType, ValueHandler, MapType, PatternType >::computeskipgrams ( const PatternPointer pattern,
int  mintokens = 2,
const IndexReference singleref = NULL,
const IndexedData multiplerefs = NULL,
PatternModelInterface constrainbymodel = NULL,
std::vector< PatternPointer > *  targetcontainer = NULL,
const bool  exhaustive = false,
const int  maxskips = 3,
const bool  DEBUG = false 
)
inlinevirtual

Low-level function to compute skipgrams for a given pattern . See higher-level function instead

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual int PatternModel< ValueType, ValueHandler, MapType, PatternType >::computeskipgrams ( const PatternPointer pattern,
PatternModelOptions options,
const IndexReference singleref = NULL,
const IndexedData multiplerefs = NULL,
PatternModelInterface constrainbymodel = NULL,
const bool  exhaustive = false 
)
inlinevirtual

Low-level function to compute skipgrams for a given pattern. See trainskipgrams() instead.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::computestats ( )
inline

Compute statistics on the model, will generally be called automatically by methods who use it, and the statistics are cached after computation.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
double PatternModel< ValueType, ValueHandler, MapType, PatternType >::coverage ( const Pattern key)
inline

Return coverage as a fraction of the total number of tokens in the model. For unindexed models this is a maximal projection rather than exact number.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::coveragecount ( const Pattern key)
inline

Returns the coverage count for the given pattern, for unindexed models, the coverage count is a mere maximum projection equal to the product of the occurence count and the size.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
PatternSet<uint64_t> PatternModel< ValueType, ValueHandler, MapType, PatternType >::extractset ( int  minlength = 1,
int  maxlength = 1 
)
inline

Returns a PatternSet containing patterns of the specified length. Patterns are actively reconstructed from patterns in the model, if necessary. So this includes patterns that are not in the model explicitly (i.e, smaller patterns that have been pruned.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual std::vector<PatternPointer> PatternModel< ValueType, ValueHandler, MapType, PatternType >::findskipgrams ( const PatternPointer pattern,
unsigned int  occurrencethreshold = 1,
int  maxskips = 3 
)
inlinevirtual

Returns a vector of all skipgrams that can be extracted from the given pattern

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
double PatternModel< ValueType, ValueHandler, MapType, PatternType >::frequency ( const Pattern pattern)
inlinevirtual

Returns the frequency of a pattern within its own group (category and size). For instance, if you pass a bigram you will get the occurence count as a fraction of the total occurrences of bigrams.

Implements PatternModelInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual ValueType* PatternModel< ValueType, ValueHandler, MapType, PatternType >::getdata ( const Pattern pattern,
bool  makeifnew = false 
)
inlinevirtual

Get the value stored for the specified pattern.

Parameters
makeifnewAdd the pattern with empty value if it does not exist (default: false)

Reimplemented in IndexedPatternModel< MapType, PatternType >, and IndexedPatternModel< MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual ValueType* PatternModel< ValueType, ValueHandler, MapType, PatternType >::getdata ( const PatternPointer pattern,
bool  makeifnew = false 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual t_relationmap PatternModel< ValueType, ValueHandler, MapType, PatternType >::getinstances ( const Pattern pattern,
int  = 0 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
PatternModelInterface* PatternModel< ValueType, ValueHandler, MapType, PatternType >::getinterface ( )
inline

Returns a more generic but limited PatternModelInterface instance (polymorphism)

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual t_relationmap PatternModel< ValueType, ValueHandler, MapType, PatternType >::getleftneighbours ( const Pattern pattern,
int  = 0,
int  = 0,
int  = 0,
int  = 0 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual int PatternModel< ValueType, ValueHandler, MapType, PatternType >::getmodeltype ( ) const
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual int PatternModel< ValueType, ValueHandler, MapType, PatternType >::getmodelversion ( ) const
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual t_relationmap_double PatternModel< ValueType, ValueHandler, MapType, PatternType >::getnpmi ( const Pattern pattern,
double  threshold 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::vector<std::pair<Pattern, int> > PatternModel< ValueType, ValueHandler, MapType, PatternType >::getpatterns ( const Pattern pattern)
inline

get all patterns in pattern that occur in the patternmodel as a vector of pairs of Patterns and occurrence count.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::vector<PatternPointer> PatternModel< ValueType, ValueHandler, MapType, PatternType >::getreverseindex ( const IndexReference  ref,
int  occurrencecount = 0,
int  category = 0,
unsigned int  size = 0 
)
inline

Given a position in the corpus , return a vector of all the patterns that cover this position.

Parameters
refThe position in the corpus
occurrencecountIf set above zero, filters to only include patterns occurring above this threshold
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to include only this category. Set to 0 for unfiltered (default)
sizeSet to any value above zero to only include patterns of the specified length.
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::vector<std::pair<IndexReference,PatternPointer> > PatternModel< ValueType, ValueHandler, MapType, PatternType >::getreverseindex_bysentence ( int  sentence)
inline

Returns pairs of positions and patterns, consisting of all patterns found in the specified sentence (or whatever unit delimites your corpus)

Parameters
sentenceThe sentence index (starts at 1)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::vector<std::pair<IndexReference,PatternPointer> > PatternModel< ValueType, ValueHandler, MapType, PatternType >::getreverseindex_left ( const IndexReference  ref)
inline

Given a position in the corpus , return a vector of all the positions and patterns (as pairs) that occur to the left of this position

Parameters
refThe position in the corpus
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::vector<std::pair<IndexReference,PatternPointer> > PatternModel< ValueType, ValueHandler, MapType, PatternType >::getreverseindex_right ( const IndexReference  ref)
inline

Given a position in the corpus , return a vector of all the positions and patterns (as pairs) that occur to the right of this position

Parameters
refThe position in the corpus
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual t_relationmap PatternModel< ValueType, ValueHandler, MapType, PatternType >::getrightneighbours ( const Pattern pattern,
int  = 0,
int  = 0,
int  = 0,
int  = 0 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual t_relationmap PatternModel< ValueType, ValueHandler, MapType, PatternType >::getskipcontent ( const PatternPointer pattern)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual t_relationmap PatternModel< ValueType, ValueHandler, MapType, PatternType >::getsubchildren ( const Pattern pattern,
int  = 0,
int  = 0,
int  = 0 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual t_relationmap PatternModel< ValueType, ValueHandler, MapType, PatternType >::getsubparents ( const Pattern pattern,
int  = 0,
int  = 0,
int  = 0 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual t_relationmap PatternModel< ValueType, ValueHandler, MapType, PatternType >::gettemplates ( const Pattern pattern,
int  = 0 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual bool PatternModel< ValueType, ValueHandler, MapType, PatternType >::has ( const Pattern pattern) const
inlinevirtual

Checks whether the given pattern occurs in the model

Implements PatternStoreInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual bool PatternModel< ValueType, ValueHandler, MapType, PatternType >::has ( const PatternPointer ) const
inlinevirtual

Does the pattern occur in the pattern store?

Implements PatternStoreInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::histogram ( std::map< unsigned int, unsigned int > &  hist,
unsigned int  threshold = 0,
unsigned int  cap = 0,
int  category = 0,
unsigned int  size = 0 
)
inline

Generate a histogram for the occurrence count of patterns

Parameters
histThis will contain the to-be-computed histogram
thresholdInclude only patterns at or above this occurrence threshold
capInclude only this many of the top frequencies (0=unconstrained)
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to filter or to 0 to cover all
sizeSet to any value above zero to only include only patterns of the specified length. (0 for all sizes)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::histogram ( std::ostream *  OUT,
unsigned int  threshold = 0,
unsigned int  cap = 0,
int  category = 0,
unsigned int  size = 0 
)
inline

Generate a histogram for the occurrence count of patterns and output it to the output stream.

Parameters
OUTthe output stream
thresholdInclude only patterns at or above this occurrence threshold
capInclude only this many of the top frequencies (0=unconstrained)
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to filter or to 0 to cover all
sizeSet to any value above zero to only include only patterns of the specified length. (0 for all sizes)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::info ( std::ostream *  OUT)
inline

Output information about the model to the output stream, includes some statistics and technical details such as space requirements.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::load ( std::string &  filename,
const PatternModelOptions options,
PatternModelInterface constrainmodel = NULL 
)
inlinevirtual

Read a pattern model from file

Parameters
filenameThe input filename
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::load ( std::istream *  f,
const PatternModelOptions options,
PatternModelInterface constrainmodel = NULL 
)
inlinevirtual

Read a pattern model from an input stream

Parameters
fThe input stream
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual int PatternModel< ValueType, ValueHandler, MapType, PatternType >::maxlength ( ) const
inlinevirtual

Returns the maximum length of patterns in this model

Implements PatternModelInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual int PatternModel< ValueType, ValueHandler, MapType, PatternType >::minlength ( ) const
inlinevirtual

Returns the minimum length of patterns in this model

Implements PatternModelInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::occurrencecount ( const Pattern pattern)
inlinevirtual

Returns the occurrenc count of the specified pattern, will return 0 if it does not exist in the model

Implements PatternModelInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::occurrencecount ( const PatternPointer pattern)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::output ( std::ostream *  )
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::outputcooc ( std::ostream *  OUT,
ClassDecoder classdecoder,
double  threshold 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::outputcooc_npmi ( std::ostream *  OUT,
ClassDecoder classdecoder,
double  threshold 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::outputrelations ( const Pattern pattern,
ClassDecoder classdecoder,
std::ostream *  OUT 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::postread ( const PatternModelOptions  options)
inlineprotectedvirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::posttrain ( const PatternModelOptions  options)
inlineprotectedvirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::print ( std::ostream *  out,
ClassDecoder decoder 
)
inlinevirtual

Print the contents of the pattern model, i.e. all patterns and associated counts, to the output stream.

Parameters
outThe output stream
decoderThe class decoder to use

Reimplemented in IndexedPatternModel< MapType, PatternType >, and IndexedPatternModel< MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::print ( std::ostream *  out,
ClassDecoder decoder,
const PatternType pattern,
bool  endline = true 
)
inlinevirtual

Print for one pattern only.

Parameters
outThe output stream
decoderThe class decoder to use

Reimplemented in IndexedPatternModel< MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::printmodel ( std::ostream *  out,
ClassDecoder decoder 
)
inline

Just an alias for print()

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::printpattern ( std::ostream *  out,
ClassDecoder decoder,
const Pattern pattern,
bool  endline = true 
)
inline

Alias for per-pattern print()

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::printreverseindex ( std::ostream *  out,
ClassDecoder decoder 
)
inlinevirtual

Print the full reverse index, a mapping of indices and all patterns that occur at those positions.

Parameters
outThe output stream
decoderThe class decoder to use
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::prune ( int  threshold,
int  _n = 0 
)
inline

Prune all patterns under the specified occurrence threshold (or -1 for all). Pruning can be limited to patterns of a particular size only.

Parameters
thresholdThe occurrence threshold (set to -1 to prune everything)
_nThe size constraint, limit to patterns of this size only (set to 0 for no constraint, default)
Returns
the number of distinct patterns pruned
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
template<class ValueType2 , class ValueHandler2 , class MapType2 >
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::prunebymodel ( PatternModel< ValueType2, ValueHandler2, MapType2 > &  secondmodel)
inline

Prune all patterns that are not in the second model

Returns
the number of distinct patterns pruned
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::prunenotinset ( const std::unordered_set< Pattern > &  s,
int  _n 
)
inline

Prune all patterns that are not in the specified set.

Parameters
sThe set containing the patterns not to prune
_nThe size constraint, limit to patterns of this size only (set to 0 for no constraint, default)
Returns
the number of distinct patterns pruned
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::pruneskipgrams ( unsigned int  threshold,
int  minskiptypes = 2,
int  _n = 0 
)
inlinevirtual

Prune all skipgrams under the specified occurrence threshold (or -1 for all). Pruning can be limited to patterns of a particular size only.

Parameters
thresholdThe occurrence threshold (set to -1 to prune everything)
_nThe size constraint, limit to patterns of this size only (set to 0 for no constraint, default)
Returns
the number of distinct patterns pruned
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::report ( std::ostream *  OUT)
inline

Output an elaborate statistical report to the output stream. Computes on first call when necessary.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::resetstats ( )
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual size_t PatternModel< ValueType, ValueHandler, MapType, PatternType >::size ( ) const
inlinevirtual

Returns the number of distinct patterns in the model

Implements PatternStoreInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::test ( MapType &  target,
std::istream *  in 
)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::tokens ( ) const
inlinevirtual

Return the total amount of word/unigram tokens in the model

Implements PatternModelInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::topthreshold ( int  amount,
int  category = 0,
int  size = 0 
)
inline
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::totaloccurrencesingroup ( int  category,
int  n 
)
inline

Obtains statistics of the model: returns the total amount of occurrences within the specified group, the group consist of a category and a size.

Parameters
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) or to 0 to cover all
nSet to any value above zero to only cover only patterns of the specified length. (0 for all sizes)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::totalpatternsingroup ( int  category,
int  n 
)
inline

Obtains statistics of the model: returns the total amount of distinct patterns within the specified group, the group consist of a category and a size.

Parameters
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) or to 0 to cover all
nSet to any value above zero to only cover only patterns of the specified length. (0 for all sizes)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::totaltokensingroup ( int  category,
int  n 
)
inline

Obtains statistics of the model: returns the total amount of covered tokens within the specified group, the group consist of a category and a size.

Parameters
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) or to 0 to cover all
nSet to any value above zero to only cover only patterns of the specified length. (0 for all sizes)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::totalwordtypesingroup ( int  category,
int  n 
)
inline

Obtains statistics of the model: returns the total amount of word/unigtams types within the specified group, the group consist of a category and a size.

Parameters
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) or to 0 to cover all
nSet to any value above zero to only cover only patterns of the specified length. (0 for all sizes)
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::train ( std::istream *  in,
PatternModelOptions  options,
PatternModelInterface constrainbymodel = NULL,
bool  continued = false,
uint32_t  firstsentence = 1,
bool  ignoreerrors = false 
)
inlinevirtual

Train a pattern model on corpus data (given an input stream)

Parameters
inThe input stream of the corpus data (*.colibri.dat), may be NULL if a reverse index is loaded.
optionsOptions for training
constrainbymodelPointer to another pattern model which should be used to constrain the training of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
continuedContinued training on the same corpus data
firstsentenceFirst sentence index, useful for augmenting a model with another corpus (keep continued set to false in this case), defaults to 1
ignoreerrorsTry to ignore errors (use for debug only)

Reimplemented in IndexedPatternModel< MapType, PatternType >, and IndexedPatternModel< MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::train ( const std::string &  filename,
PatternModelOptions  options,
PatternModelInterface constrainbymodel = NULL,
bool  continued = false,
uint32_t  firstsentence = 1,
bool  ignoreerrors = false 
)
inlinevirtual

Train a pattern model on corpus data

Parameters
filenameThe filename of the corpus data (*.colibri.dat)
optionsOptions for training
constrainbymodelPointer to another pattern model which should be used to constrain the training of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)

Reimplemented in IndexedPatternModel< MapType, PatternType >, and IndexedPatternModel< MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual void PatternModel< ValueType, ValueHandler, MapType, PatternType >::trainskipgrams ( const PatternModelOptions  options,
PatternModelInterface constrainbymodel = NULL 
)
inlinevirtual

Train skipgrams, for indexed models only

Reimplemented in IndexedPatternModel< MapType, PatternType >, and IndexedPatternModel< MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned char PatternModel< ValueType, ValueHandler, MapType, PatternType >::type ( ) const
inline
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
virtual unsigned int PatternModel< ValueType, ValueHandler, MapType, PatternType >::types ( )
inlinevirtual

Return the total amount of word/unigram types in the model

Implements PatternModelInterface.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned char PatternModel< ValueType, ValueHandler, MapType, PatternType >::version ( ) const
inline
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::write ( std::ostream *  out)
inline

Write the pattern model to output stream

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
void PatternModel< ValueType, ValueHandler, MapType, PatternType >::write ( const std::string  filename)
inline

Save the entire pattern model to file

Member Data Documentation

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::set<int> PatternModel< ValueType, ValueHandler, MapType, PatternType >::cache_categories
protected
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::map<int,std::map<int,unsigned int> > PatternModel< ValueType, ValueHandler, MapType, PatternType >::cache_grouptotal
protected

total occurrences (used for frequency computation, within a group)

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::map<int,std::map<int,unsigned int> > PatternModel< ValueType, ValueHandler, MapType, PatternType >::cache_grouptotalpatterns
protected

total distinct patterns per group

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::map<int,std::map<int,unsigned int> > PatternModel< ValueType, ValueHandler, MapType, PatternType >::cache_grouptotaltokens
protected

total covered tokens per group

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::map<int,std::map<int,unsigned int> > PatternModel< ValueType, ValueHandler, MapType, PatternType >::cache_grouptotalwordtypes
protected

total covered word types per group

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::set<int> PatternModel< ValueType, ValueHandler, MapType, PatternType >::cache_n
protected
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
std::map<int, std::vector< uint32_t > > PatternModel< ValueType, ValueHandler, MapType, PatternType >::gapmasks
protected

pre-computed masks representing possible gap configurations for various pattern lengths

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
bool PatternModel< ValueType, ValueHandler, MapType, PatternType >::hasskipgrams

Does this model have skipgrams?

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
int PatternModel< ValueType, ValueHandler, MapType, PatternType >::maxn
protected
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
int PatternModel< ValueType, ValueHandler, MapType, PatternType >::minn
protected
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned char PatternModel< ValueType, ValueHandler, MapType, PatternType >::model_type
protected
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
unsigned char PatternModel< ValueType, ValueHandler, MapType, PatternType >::model_version
protected
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
IndexedCorpus* PatternModel< ValueType, ValueHandler, MapType, PatternType >::reverseindex

Pointer to the reverse index and corpus data for this model (or NULL)

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
bool PatternModel< ValueType, ValueHandler, MapType, PatternType >::reverseindex_internal
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
uint64_t PatternModel< ValueType, ValueHandler, MapType, PatternType >::totaltokens
protected

Total number of tokens in the original corpus, so INCLUDES TOKENS NOT COVERED BY THE MODEL!

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternMap<ValueType, BaseValueHandler<ValueType>>, class PatternType = Pattern>
uint64_t PatternModel< ValueType, ValueHandler, MapType, PatternType >::totaltypes
protected

Total number of unigram/word types in the original corpus, SO INCLUDING NOT COVERED BY THE MODEL!


The documentation for this class was generated from the following file: