Colibri Core
Public Member Functions | Protected Member Functions | List of all members
IndexedPatternModel< MapType, PatternType > Class Template Reference

An indexed model mapping patterns to values, high-level interface. This is a specialised subclass of PatternMap. More...

#include <patternmodel.h>

Inheritance diagram for IndexedPatternModel< MapType, PatternType >:
PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType > PatternModelInterface PatternStoreInterface PatternStoreInterface

Public Member Functions

 IndexedPatternModel (IndexedCorpus *corpus=NULL)
 
 IndexedPatternModel (std::istream *f, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 IndexedPatternModel (const std::string filename, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
virtual ~IndexedPatternModel ()
 
int getmodeltype () const
 
int getmodelversion () const
 
virtual void add (const Pattern &pattern, IndexedData *value, const IndexReference &ref)
 
virtual void add (const PatternPointer &patternpointer, IndexedData *value, const IndexReference &ref)
 
IndexedDatagetdata (const Pattern &pattern, bool makeifnew=false)
 
IndexedDatagetdata (const PatternPointer &pattern, bool makeifnew=false)
 
virtual void train (std::istream *in, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false)
 
virtual void train (const std::string &filename, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false)
 
void info (std::ostream *OUT)
 
void print (std::ostream *out, ClassDecoder &decoder)
 
void print (std::ostream *out, ClassDecoder &decoder, const PatternPointer &pattern, bool endline=true)
 
virtual void trainskipgrams (PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL)
 
Pattern getpatternfromtoken (IndexReference ref)
 
t_relationmap getskipcontent (const PatternPointer &pattern)
 
void prunerelations (t_relationmap &relations, unsigned int occurrencethreshold)
 
t_relationmap gettemplates (const Pattern &pattern, unsigned int occurrencethreshold=0)
 
t_relationmap getinstances (const Pattern &pattern, unsigned int occurrencethreshold=0)
 
t_relationmap getsubchildren (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0)
 
t_relationmap getsubparents (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0)
 
t_relationmap getleftneighbours (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0, unsigned int cutoff=0)
 
t_relationmap getrightneighbours (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0, unsigned int cutoff=0)
 
int pruneskipgrams (int threshold, int minskiptypes, int _n=0)
 
virtual void computecoveragestats (int category=0, int n=0)
 
t_relationmap getrightcooc (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0, IndexedData *matches=NULL)
 
t_relationmap getleftcooc (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0)
 
t_relationmap getcooc (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0, bool ordersignificant=false)
 
double npmi (const PatternPointer &key1, const PatternPointer &key2, int jointcount)
 
void outputrelations (const PatternPointer &pattern, t_relationmap &relations, ClassDecoder &classdecoder, std::ostream *OUT, const std::string label="RELATED-TO")
 
void outputrelations (const PatternPointer &pattern, ClassDecoder &classdecoder, std::ostream *OUT, bool outputheader=true)
 
void computenpmi (std::map< PatternPointer, t_relationmap_double > &coocmap, double threshold, bool right=true, bool left=true)
 
void computecooc (std::map< PatternPointer, t_relationmap > &coocmap, int threshold, bool right=true, bool left=true)
 
int computeflexgrams_fromskipgrams ()
 
int computeflexgrams_fromcooc (double threshold)
 
void outputcooc_npmi (std::ostream *OUT, ClassDecoder &classdecoder, double threshold)
 
void outputcooc (std::ostream *OUT, ClassDecoder &classdecoder, double threshold)
 
int flexgramsize (const Pattern &pattern, IndexReference begin)
 
- Public Member Functions inherited from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >
 PatternModel (IndexedCorpus *corpus=NULL)
 
 PatternModel (std::istream *f, PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 PatternModel (const std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 ~PatternModel ()
 
virtual size_t size () const
 
virtual bool has (const Pattern &pattern) const
 
virtual bool has (const PatternPointer &pattern) const
 
virtual void load (std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL)
 
virtual void load (std::istream *f, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL)
 
PatternModelInterfacegetinterface ()
 
virtual int computeskipgrams (const PatternPointer &pattern, int mintokens=2, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, std::vector< PatternPointer > *targetcontainer=NULL, const bool exhaustive=false, const int maxskips=3, const bool DEBUG=false)
 
virtual int computeskipgrams (const PatternPointer &pattern, PatternModelOptions &options, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, const bool exhaustive=false)
 
virtual std::vector< PatternPointerfindskipgrams (const PatternPointer &pattern, unsigned int occurrencethreshold=1, int maxskips=3)
 
void test (MapType &target, std::istream *in)
 
void write (std::ostream *out)
 
void write (const std::string filename)
 
virtual int maxlength () const
 
virtual int minlength () const
 
virtual unsigned int occurrencecount (const Pattern &pattern)
 
virtual unsigned int occurrencecount (const PatternPointer &pattern)
 
virtual unsigned int types ()
 
virtual unsigned int tokens () const
 
unsigned char type () const
 
unsigned char version () const
 
void output (std::ostream *)
 
unsigned int coveragecount (const Pattern &key)
 
double coverage (const Pattern &key)
 
std::vector< PatternPointergetreverseindex (const IndexReference ref, int occurrencecount=0, int category=0, unsigned int size=0)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_bysentence (int sentence)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_right (const IndexReference ref)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_left (const IndexReference ref)
 
void computestats ()
 
virtual void resetstats ()
 
unsigned int totaloccurrencesingroup (int category, int n)
 
unsigned int totalpatternsingroup (int category, int n)
 
unsigned int totalwordtypesingroup (int category, int n)
 
unsigned int totaltokensingroup (int category, int n)
 
double frequency (const Pattern &pattern)
 
virtual void add (const PatternPointer &patternpointer, const IndexReference &ref)
 
unsigned int prune (int threshold, int _n=0)
 
virtual unsigned int pruneskipgrams (unsigned int threshold, int minskiptypes=2, int _n=0)
 
unsigned int prunenotinset (const std::unordered_set< Pattern > &s, int _n)
 
unsigned int prunebymodel (PatternModel< ValueType2, ValueHandler2, MapType2 > &secondmodel)
 
std::vector< std::pair< Pattern, int > > getpatterns (const Pattern &pattern)
 
virtual void print (std::ostream *out, ClassDecoder &decoder, const PatternType &pattern, bool endline=true)
 
virtual void printreverseindex (std::ostream *out, ClassDecoder &decoder)
 
void printmodel (std::ostream *out, ClassDecoder &decoder)
 
void printpattern (std::ostream *out, ClassDecoder &decoder, const Pattern &pattern, bool endline=true)
 
void histogram (std::map< unsigned int, unsigned int > &hist, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0)
 
void histogram (std::ostream *OUT, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0)
 
unsigned int topthreshold (int amount, int category=0, int size=0)
 
void info (std::ostream *OUT)
 
void report (std::ostream *OUT)
 
PatternSet< uint64_t > extractset (int minlength=1, int maxlength=1)
 
virtual void outputrelations (const Pattern &pattern, ClassDecoder &classdecoder, std::ostream *OUT)
 
virtual t_relationmap getsubchildren (const Pattern &pattern, int=0, int=0, int=0)
 
virtual t_relationmap getsubparents (const Pattern &pattern, int=0, int=0, int=0)
 
virtual t_relationmap gettemplates (const Pattern &pattern, int=0)
 
virtual t_relationmap getinstances (const Pattern &pattern, int=0)
 
virtual t_relationmap getleftneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0)
 
virtual t_relationmap getrightneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0)
 
virtual t_relationmap_double getnpmi (const Pattern &pattern, double threshold)
 
virtual int computeflexgrams_fromcooc ()
 
- Public Member Functions inherited from PatternModelInterface
virtual PatternStoreInterfacegetstoreinterface ()
 
virtual PatternStoreInterfacegetstoreinterface ()
 

Protected Member Functions

virtual void postread (const PatternModelOptions options)
 
virtual void posttrain (const PatternModelOptions options)
 

Additional Inherited Members

- Public Types inherited from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >
typedef MapType::iterator iterator
 
typedef MapType::const_iterator const_iterator
 
- Public Attributes inherited from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >
IndexedCorpusreverseindex
 Pointer to the reverse index and corpus data for this model (or NULL) More...
 
bool reverseindex_internal
 
bool hasskipgrams
 Does this model have skipgrams? More...
 
- Protected Attributes inherited from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >
unsigned char model_type
 
unsigned char model_version
 
uint64_t totaltokens
 Total number of tokens in the original corpus, so INCLUDES TOKENS NOT COVERED BY THE MODEL! More...
 
uint64_t totaltypes
 Total number of unigram/word types in the original corpus, SO INCLUDING NOT COVERED BY THE MODEL! More...
 
int maxn
 
int minn
 
std::set< int > cache_categories
 
std::set< int > cache_n
 
std::map< int, std::map< int, unsigned int > > cache_grouptotal
 total occurrences (used for frequency computation, within a group) More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotalpatterns
 total distinct patterns per group More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotalwordtypes
 total covered word types per group More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotaltokens
 total covered tokens per group More...
 
std::map< int, std::vector< uint32_t > > gapmasks
 pre-computed masks representing possible gap configurations for various pattern lengths More...
 

Detailed Description

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
class IndexedPatternModel< MapType, PatternType >

An indexed model mapping patterns to values, high-level interface. This is a specialised subclass of PatternMap.

Template Parameters
MapTypeThe type of container to use

Constructor & Destructor Documentation

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
IndexedPatternModel< MapType, PatternType >::IndexedPatternModel ( IndexedCorpus corpus = NULL)
inline

Begin a new pattern model, optionally pre-setting a reverseindex.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
IndexedPatternModel< MapType, PatternType >::IndexedPatternModel ( std::istream *  f,
const PatternModelOptions  options,
PatternModelInterface constrainmodel = NULL,
IndexedCorpus corpus = NULL 
)
inline

Read a pattern model from an input stream

Parameters
fThe input stream
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
corpusPointer to the loaded corpus, used as a reverse index.
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
IndexedPatternModel< MapType, PatternType >::IndexedPatternModel ( const std::string  filename,
const PatternModelOptions  options,
PatternModelInterface constrainmodel = NULL,
IndexedCorpus corpus = NULL 
)
inline

Read a pattern model from file

Parameters
filenameThe filename
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
corpusPointer to the loaded corpus, used as a reverse index.
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual IndexedPatternModel< MapType, PatternType >::~IndexedPatternModel ( )
inlinevirtual

Member Function Documentation

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual void IndexedPatternModel< MapType, PatternType >::add ( const Pattern pattern,
IndexedData value,
const IndexReference ref 
)
inlinevirtual

Add a pattern, with a given position, and a value to the model. This is called during training at every time an instance of a pattern is found in the data.

Parameters
patternThe pattern to add
valueA pointer to the value for this pattern, set to NULL and it will be automatically determined
IndexReferenceThe position in the corpus where the patterns occurs

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual void IndexedPatternModel< MapType, PatternType >::add ( const PatternPointer patternpointer,
IndexedData value,
const IndexReference ref 
)
inlinevirtual
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::computecooc ( std::map< PatternPointer, t_relationmap > &  coocmap,
int  threshold,
bool  right = true,
bool  left = true 
)
inline

Compute co-occurence as absolute joint occurrence count, for all patterns

Parameters
coocmapThe map that will store the results
thresholdOnly include pairs passing this NPMI threshold
rightCompute co-occurence to the right (default: true)
leftCompute co-occurence to the left (default: true)
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual void IndexedPatternModel< MapType, PatternType >::computecoveragestats ( int  category = 0,
int  n = 0 
)
inlinevirtual

Compute coverage statistics on the model, will generally be called automatically by methods who use it, and the statistics are cached after computation.

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
int IndexedPatternModel< MapType, PatternType >::computeflexgrams_fromcooc ( double  threshold)
inline

Compute flexgrams using co-occurrence

Parameters
thresholdNormalised Pointwise Mutual Information threshold
Returns
The number of flexgrams found
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
int IndexedPatternModel< MapType, PatternType >::computeflexgrams_fromskipgrams ( )
inlinevirtual

Compute flexgrams by abstracting from existing skipgrams in the model

Returns
The number of flexgrams found

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::computenpmi ( std::map< PatternPointer, t_relationmap_double > &  coocmap,
double  threshold,
bool  right = true,
bool  left = true 
)
inline
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
int IndexedPatternModel< MapType, PatternType >::flexgramsize ( const Pattern pattern,
IndexReference  begin 
)
inline

attempt to find the flexgram size for the given begin position, returns 0 if the flexgram was not found at all if there are multiple matches, the shortest is returned

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getcooc ( const PatternPointer pattern,
unsigned int  occurrencethreshold = 0,
int  category = 0,
unsigned int  size = 0,
bool  ordersignificant = false 
)
inline

Returns all patterns in the model that co-occur with the given pattern in the same sentence

Parameters
occurrencethresholdIf set above zero, filters to only include patterns occurring above this threshold
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to include only this category. Set to 0 for unfiltered (default)
sizeSet to any value above zero to only include patterns of the specified length.
ordersignificantIf set to true, each co-occuring pair will occur at least once in the result, if false (default) it will appear twice, once in A,B form and once in B,A form.
Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
IndexedData* IndexedPatternModel< MapType, PatternType >::getdata ( const Pattern pattern,
bool  makeifnew = false 
)
inlinevirtual

Get the indices stored for the specified pattern.

Parameters
makeifnewAdd the pattern with empty value if it does not exist (default: false)

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
IndexedData* IndexedPatternModel< MapType, PatternType >::getdata ( const PatternPointer pattern,
bool  makeifnew = false 
)
inlinevirtual
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getinstances ( const Pattern pattern,
unsigned int  occurrencethreshold = 0 
)
inline

Returns all ngrams in the model that instantiate the given skipgram/flexgram. If all the gaps in a skipgram/flexgram are filled, we speak of such an instantiation. An occurrence threshold may be used to filter.

Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getleftcooc ( const PatternPointer pattern,
unsigned int  occurrencethreshold = 0,
int  category = 0,
unsigned int  size = 0 
)
inline

Returns all patterns in the model that co-occur with the given pattern in the same sentence and appear to the left of the given pattern

Parameters
occurrencethresholdIf set above zero, filters to only include patterns occurring above this threshold
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to include only this category. Set to 0 for unfiltered (default)
sizeSet to any value above zero to only include patterns of the specified length.
Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getleftneighbours ( const PatternPointer pattern,
unsigned int  occurrencethreshold = 0,
int  category = 0,
unsigned int  size = 0,
unsigned int  cutoff = 0 
)
inline

Returns all patterns in the model that directly neighbour the given pattern at the left side

Parameters
occurrencethresholdIf set above zero, filters to only include patterns occurring above this threshold
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to include only this category. Set to 0 for unfiltered (default)
sizeSet to any value above zero to only include patterns of the specified length.
Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
int IndexedPatternModel< MapType, PatternType >::getmodeltype ( ) const
inlinevirtual

Returns the type of model (a value from ModelType)

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
int IndexedPatternModel< MapType, PatternType >::getmodelversion ( ) const
inlinevirtual

Returns the version of the model implementation and binary serialisation format

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
Pattern IndexedPatternModel< MapType, PatternType >::getpatternfromtoken ( IndexReference  ref)
inline

Return the unigram Pattern that occurs on the specified position, using the reverse index.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getrightcooc ( const PatternPointer pattern,
unsigned int  occurrencethreshold = 0,
int  category = 0,
unsigned int  size = 0,
IndexedData matches = NULL 
)
inline

Returns all patterns in the model that co-occur with the given pattern in the same sentence and appear to the right of the given pattern

Parameters
occurrencethresholdIf set above zero, filters to only include patterns occurring above this threshold
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to include only this category. Set to 0 for unfiltered (default)
sizeSet to any value above zero to only include patterns of the specified length.
Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getrightneighbours ( const PatternPointer pattern,
unsigned int  occurrencethreshold = 0,
int  category = 0,
unsigned int  size = 0,
unsigned int  cutoff = 0 
)
inline

Returns all patterns in the model that directly neighbour the given pattern at the right side

Parameters
occurrencethresholdIf set above zero, filters to only include patterns occurring above this threshold
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to include only this category. Set to 0 for unfiltered (default)
sizeSet to any value above zero to only include patterns of the specified length.
Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getskipcontent ( const PatternPointer pattern)
inlinevirtual

Given a skipgram, returns patterns in the model which would instantiate the skipgram if inserted into the gaps. For skipgrams with multiple gaps, these skip content patterns are themselves skipgrams. Skipgram and skip content complement eachother

Returns
A relation map

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getsubchildren ( const PatternPointer pattern,
unsigned int  occurrencethreshold = 0,
int  category = 0,
unsigned int  size = 0 
)
inline

Returns all patterns in the model that are subsumed by the specified pattern. Subsumed patterns are smaller than the subsuming pattern. Every n-gram (except unigram) by definition subsumes two n-1-grams.

Parameters
occurrencethresholdIf set above zero, filters to only include patterns occurring above this threshold
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to include only this category. Set to 0 for unfiltered (default)
sizeSet to any value above zero to only include patterns of the specified length.
Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::getsubparents ( const PatternPointer pattern,
unsigned int  occurrencethreshold = 0,
int  category = 0,
unsigned int  size = 0 
)
inline

Returns all patterns in the model that subsume the specified pattern. Subsuming patterns are larger than the subsuming pattern. Every n-gram (except unigram) by definition subsumes two n-1-grams.

Parameters
occurrencethresholdIf set above zero, filters to only include patterns occurring above this threshold
categorySet to any value of PatternCategory (NGRAM,SKIPGRAM,FLEXGRAM) to include only this category. Set to 0 for unfiltered (default)
sizeSet to any value above zero to only include patterns of the specified length.
Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
t_relationmap IndexedPatternModel< MapType, PatternType >::gettemplates ( const Pattern pattern,
unsigned int  occurrencethreshold = 0 
)
inline

Returns all skipgrams and flexgrams in the model that are an abstraction of the specified pattern. Pattern itself may be a skipgram too. An optional occurrence threshold may be used to filter.

Returns
a relation map
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::info ( std::ostream *  OUT)
inline

Output information about the model to the output stream, includes some statistics and technical details such as space requirements.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
double IndexedPatternModel< MapType, PatternType >::npmi ( const PatternPointer key1,
const PatternPointer key2,
int  jointcount 
)
inline

Compute normalised pointwise mutual information given two patterns and their joint occurrence count.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::outputcooc ( std::ostream *  OUT,
ClassDecoder classdecoder,
double  threshold 
)
inlinevirtual

Compute and output co-occurrence relations as joint occurrence count

Parameters
thresholdNormalised Pointwise Mutual Information threshold

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::outputcooc_npmi ( std::ostream *  OUT,
ClassDecoder classdecoder,
double  threshold 
)
inlinevirtual

Compute and output co-occurrence relations as Normalised Pointwise Mutual Information

Parameters
thresholdNormalised Pointwise Mutual Information threshold

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::outputrelations ( const PatternPointer pattern,
t_relationmap relations,
ClassDecoder classdecoder,
std::ostream *  OUT,
const std::string  label = "RELATED-TO" 
)
inline

Output the specified relation map for the specified pattern to output stream. Low-level function.

Parameters
patternThe pattern
relationsA relation map
classdecoderA class decoder
OUTThe output stream
labelA label to insert between relations (defaults to: RELATED-TO)
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::outputrelations ( const PatternPointer pattern,
ClassDecoder classdecoder,
std::ostream *  OUT,
bool  outputheader = true 
)
inline

Compute and output all possible relations for a given pattern. High-level function.

Parameters
patternThe pattern
classdecoderA class decoder
OUTThe output stream
outputheaderOutput a header (default: true)
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual void IndexedPatternModel< MapType, PatternType >::postread ( const PatternModelOptions  options)
inlineprotectedvirtual
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual void IndexedPatternModel< MapType, PatternType >::posttrain ( const PatternModelOptions  options)
inlineprotectedvirtual
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::print ( std::ostream *  out,
ClassDecoder decoder 
)
inlinevirtual

Print the contents of the pattern model, i.e. all patterns and associated counts, to the output stream.

Parameters
outThe output stream
decoderThe class decoder to use

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::print ( std::ostream *  out,
ClassDecoder decoder,
const PatternPointer pattern,
bool  endline = true 
)
inline
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
void IndexedPatternModel< MapType, PatternType >::prunerelations ( t_relationmap relations,
unsigned int  occurrencethreshold 
)
inline

Given a relation map, prune relations below the specified occurrence threshold

Parameters
relationsThe relationmap to manipulate
occurrencethresholdThe occurrence threshold
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
int IndexedPatternModel< MapType, PatternType >::pruneskipgrams ( int  threshold,
int  minskiptypes,
int  _n = 0 
)
inline

Prune skipgrams based on an occurrence threshold, and a skiptype threshold. The latter enforces that at least the specified number of distinct patterns must fit in the gap for the skipgram to be retained.

Parameters
_nSet to any value above zero to only include patterns of the specified length.
template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual void IndexedPatternModel< MapType, PatternType >::train ( std::istream *  in,
PatternModelOptions  options,
PatternModelInterface constrainbymodel = NULL,
bool  continued = false,
uint32_t  firstsentence = 1,
bool  ignoreerrors = false 
)
inlinevirtual

Train a pattern model on corpus data (given an input stream)

Parameters
inThe input stream of the corpus data (*.colibri.dat), may be NULL if a reverse index is loaded.
optionsOptions for training
constrainbymodelPointer to another pattern model which should be used to constrain the training of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
continuedContinued training on the same corpus data
firstsentenceFirst sentence index, useful for augmenting a model with another corpus (keep continued set to false in this case), defaults to 1
ignoreerrorsTry to ignore errors (use for debug only)

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual void IndexedPatternModel< MapType, PatternType >::train ( const std::string &  filename,
PatternModelOptions  options,
PatternModelInterface constrainbymodel = NULL,
bool  continued = false,
uint32_t  firstsentence = 1,
bool  ignoreerrors = false 
)
inlinevirtual

Train a pattern model on corpus data

Parameters
filenameThe filename of the corpus data (*.colibri.dat)
optionsOptions for training
constrainbymodelPointer to another pattern model which should be used to constrain the training of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.

template<class MapType = PatternMap<IndexedData,IndexedDataHandler>, class PatternType = Pattern>
virtual void IndexedPatternModel< MapType, PatternType >::trainskipgrams ( PatternModelOptions  options,
PatternModelInterface constrainbymodel = NULL 
)
inlinevirtual

Train skipgrams, for indexed models only

Parameters
optionsPattern model options
constrainbymodelPointer to a pattern model to use as contraint: only include skipgrams that occur in the constraint model (default: NULL)

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternType >.


The documentation for this class was generated from the following file: