Colibri Core
Public Member Functions | List of all members
IndexedPatternPointerModel< MapType > Class Template Reference

#include <patternmodel.h>

Inheritance diagram for IndexedPatternPointerModel< MapType >:
IndexedPatternModel< MapType, PatternPointer > PatternModel< IndexedData, IndexedDataHandler, MapType, PatternPointer > PatternModelInterface PatternStoreInterface PatternStoreInterface

Public Member Functions

 IndexedPatternPointerModel (IndexedCorpus *corpus)
 
 IndexedPatternPointerModel (std::istream *f, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 IndexedPatternPointerModel (const std::string filename, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
int getmodeltype () const
 
int getmodelversion () const
 
void add (const PatternPointer &patternpointer, const IndexReference &ref)
 
void add (const PatternPointer &patternpointer, IndexedData *value, const IndexReference &ref)
 
- Public Member Functions inherited from IndexedPatternModel< MapType, PatternPointer >
 IndexedPatternModel (IndexedCorpus *corpus=NULL)
 
 IndexedPatternModel (std::istream *f, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 IndexedPatternModel (const std::string filename, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
virtual ~IndexedPatternModel ()
 
int getmodeltype () const
 
int getmodelversion () const
 
virtual void add (const Pattern &pattern, IndexedData *value, const IndexReference &ref)
 
IndexedDatagetdata (const Pattern &pattern, bool makeifnew=false)
 
IndexedDatagetdata (const PatternPointer &pattern, bool makeifnew=false)
 
virtual void train (std::istream *in, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false)
 
virtual void train (const std::string &filename, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false)
 
void info (std::ostream *OUT)
 
void print (std::ostream *out, ClassDecoder &decoder)
 
void print (std::ostream *out, ClassDecoder &decoder, const PatternPointer &pattern, bool endline=true)
 
virtual void trainskipgrams (PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL)
 
Pattern getpatternfromtoken (IndexReference ref)
 
t_relationmap getskipcontent (const PatternPointer &pattern)
 
void prunerelations (t_relationmap &relations, unsigned int occurrencethreshold)
 
t_relationmap gettemplates (const Pattern &pattern, unsigned int occurrencethreshold=0)
 
t_relationmap getinstances (const Pattern &pattern, unsigned int occurrencethreshold=0)
 
t_relationmap getsubchildren (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0)
 
t_relationmap getsubparents (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0)
 
t_relationmap getleftneighbours (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0, unsigned int cutoff=0)
 
t_relationmap getrightneighbours (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0, unsigned int cutoff=0)
 
int pruneskipgrams (int threshold, int minskiptypes, int _n=0)
 
virtual void computecoveragestats (int category=0, int n=0)
 
t_relationmap getrightcooc (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0, IndexedData *matches=NULL)
 
t_relationmap getleftcooc (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0)
 
t_relationmap getcooc (const PatternPointer &pattern, unsigned int occurrencethreshold=0, int category=0, unsigned int size=0, bool ordersignificant=false)
 
double npmi (const PatternPointer &key1, const PatternPointer &key2, int jointcount)
 
void outputrelations (const PatternPointer &pattern, t_relationmap &relations, ClassDecoder &classdecoder, std::ostream *OUT, const std::string label="RELATED-TO")
 
void outputrelations (const PatternPointer &pattern, ClassDecoder &classdecoder, std::ostream *OUT, bool outputheader=true)
 
void computenpmi (std::map< PatternPointer, t_relationmap_double > &coocmap, double threshold, bool right=true, bool left=true)
 
void computecooc (std::map< PatternPointer, t_relationmap > &coocmap, int threshold, bool right=true, bool left=true)
 
int computeflexgrams_fromskipgrams ()
 
int computeflexgrams_fromcooc (double threshold)
 
void outputcooc_npmi (std::ostream *OUT, ClassDecoder &classdecoder, double threshold)
 
void outputcooc (std::ostream *OUT, ClassDecoder &classdecoder, double threshold)
 
int flexgramsize (const Pattern &pattern, IndexReference begin)
 
- Public Member Functions inherited from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternPointer >
 PatternModel (IndexedCorpus *corpus=NULL)
 
 PatternModel (std::istream *f, PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 PatternModel (const std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 ~PatternModel ()
 
virtual size_t size () const
 
virtual bool has (const Pattern &pattern) const
 
virtual bool has (const PatternPointer &pattern) const
 
virtual void load (std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL)
 
virtual void load (std::istream *f, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL)
 
PatternModelInterfacegetinterface ()
 
virtual int computeskipgrams (const PatternPointer &pattern, int mintokens=2, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, std::vector< PatternPointer > *targetcontainer=NULL, const bool exhaustive=false, const int maxskips=3, const bool DEBUG=false)
 
virtual int computeskipgrams (const PatternPointer &pattern, PatternModelOptions &options, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, const bool exhaustive=false)
 
virtual std::vector< PatternPointerfindskipgrams (const PatternPointer &pattern, unsigned int occurrencethreshold=1, int maxskips=3)
 
void test (MapType &target, std::istream *in)
 
void write (std::ostream *out)
 
void write (const std::string filename)
 
virtual int maxlength () const
 
virtual int minlength () const
 
virtual unsigned int occurrencecount (const Pattern &pattern)
 
virtual unsigned int occurrencecount (const PatternPointer &pattern)
 
virtual unsigned int types ()
 
virtual unsigned int tokens () const
 
unsigned char type () const
 
unsigned char version () const
 
void output (std::ostream *)
 
unsigned int coveragecount (const Pattern &key)
 
double coverage (const Pattern &key)
 
std::vector< PatternPointergetreverseindex (const IndexReference ref, int occurrencecount=0, int category=0, unsigned int size=0)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_bysentence (int sentence)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_right (const IndexReference ref)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_left (const IndexReference ref)
 
void computestats ()
 
virtual void resetstats ()
 
unsigned int totaloccurrencesingroup (int category, int n)
 
unsigned int totalpatternsingroup (int category, int n)
 
unsigned int totalwordtypesingroup (int category, int n)
 
unsigned int totaltokensingroup (int category, int n)
 
double frequency (const Pattern &pattern)
 
unsigned int prune (int threshold, int _n=0)
 
virtual unsigned int pruneskipgrams (unsigned int threshold, int minskiptypes=2, int _n=0)
 
unsigned int prunenotinset (const std::unordered_set< Pattern > &s, int _n)
 
unsigned int prunebymodel (PatternModel< ValueType2, ValueHandler2, MapType2 > &secondmodel)
 
std::vector< std::pair< Pattern, int > > getpatterns (const Pattern &pattern)
 
virtual void printreverseindex (std::ostream *out, ClassDecoder &decoder)
 
void printmodel (std::ostream *out, ClassDecoder &decoder)
 
void printpattern (std::ostream *out, ClassDecoder &decoder, const Pattern &pattern, bool endline=true)
 
void histogram (std::map< unsigned int, unsigned int > &hist, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0)
 
void histogram (std::ostream *OUT, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0)
 
unsigned int topthreshold (int amount, int category=0, int size=0)
 
void info (std::ostream *OUT)
 
void report (std::ostream *OUT)
 
PatternSet< uint64_t > extractset (int minlength=1, int maxlength=1)
 
virtual void outputrelations (const Pattern &pattern, ClassDecoder &classdecoder, std::ostream *OUT)
 
virtual t_relationmap getsubchildren (const Pattern &pattern, int=0, int=0, int=0)
 
virtual t_relationmap getsubparents (const Pattern &pattern, int=0, int=0, int=0)
 
virtual t_relationmap gettemplates (const Pattern &pattern, int=0)
 
virtual t_relationmap getinstances (const Pattern &pattern, int=0)
 
virtual t_relationmap getleftneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0)
 
virtual t_relationmap getrightneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0)
 
virtual t_relationmap_double getnpmi (const Pattern &pattern, double threshold)
 
virtual int computeflexgrams_fromcooc ()
 
- Public Member Functions inherited from PatternModelInterface
virtual PatternStoreInterfacegetstoreinterface ()
 
virtual PatternStoreInterfacegetstoreinterface ()
 

Additional Inherited Members

- Public Types inherited from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternPointer >
typedef MapType::iterator iterator
 
typedef MapType::const_iterator const_iterator
 
- Public Attributes inherited from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternPointer >
IndexedCorpusreverseindex
 Pointer to the reverse index and corpus data for this model (or NULL) More...
 
bool reverseindex_internal
 
bool hasskipgrams
 Does this model have skipgrams? More...
 
- Protected Member Functions inherited from IndexedPatternModel< MapType, PatternPointer >
virtual void postread (const PatternModelOptions options)
 
virtual void posttrain (const PatternModelOptions options)
 
- Protected Attributes inherited from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternPointer >
unsigned char model_type
 
unsigned char model_version
 
uint64_t totaltokens
 Total number of tokens in the original corpus, so INCLUDES TOKENS NOT COVERED BY THE MODEL! More...
 
uint64_t totaltypes
 Total number of unigram/word types in the original corpus, SO INCLUDING NOT COVERED BY THE MODEL! More...
 
int maxn
 
int minn
 
std::set< int > cache_categories
 
std::set< int > cache_n
 
std::map< int, std::map< int, unsigned int > > cache_grouptotal
 total occurrences (used for frequency computation, within a group) More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotalpatterns
 total distinct patterns per group More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotalwordtypes
 total covered word types per group More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotaltokens
 total covered tokens per group More...
 
std::map< int, std::vector< uint32_t > > gapmasks
 pre-computed masks representing possible gap configurations for various pattern lengths More...
 

Constructor & Destructor Documentation

template<class MapType = PatternPointerMap<IndexedData, IndexedDataHandler>>
IndexedPatternPointerModel< MapType >::IndexedPatternPointerModel ( IndexedCorpus corpus)
inline
template<class MapType = PatternPointerMap<IndexedData, IndexedDataHandler>>
IndexedPatternPointerModel< MapType >::IndexedPatternPointerModel ( std::istream *  f,
const PatternModelOptions  options,
PatternModelInterface constrainmodel = NULL,
IndexedCorpus corpus = NULL 
)
inline

Read a pattern model from an input stream

Parameters
fThe input stream
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
corpusPointer to the loaded corpus, used as a reverse index.
template<class MapType = PatternPointerMap<IndexedData, IndexedDataHandler>>
IndexedPatternPointerModel< MapType >::IndexedPatternPointerModel ( const std::string  filename,
const PatternModelOptions  options,
PatternModelInterface constrainmodel = NULL,
IndexedCorpus corpus = NULL 
)
inline

Read a pattern model from file

Parameters
filenameThe filename
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
corpusPointer to the loaded corpus, used as a reverse index.

Member Function Documentation

template<class MapType = PatternPointerMap<IndexedData, IndexedDataHandler>>
void IndexedPatternPointerModel< MapType >::add ( const PatternPointer patternpointer,
const IndexReference ref 
)
inlinevirtual

Add a pattern, with a given position, and a value to the model. This is called during training at every time an instance of a pattern is found in the data.

Parameters
patternThe pattern to add
valueA pointer to the value for this pattern, set to NULL and it will be automatically determined
IndexReferenceThe position in the corpus where the patterns occurs

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternPointer >.

template<class MapType = PatternPointerMap<IndexedData, IndexedDataHandler>>
void IndexedPatternPointerModel< MapType >::add ( const PatternPointer patternpointer,
IndexedData value,
const IndexReference ref 
)
inlinevirtual
template<class MapType = PatternPointerMap<IndexedData, IndexedDataHandler>>
int IndexedPatternPointerModel< MapType >::getmodeltype ( ) const
inlinevirtual

Returns the type of model (a value from ModelType)

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternPointer >.

template<class MapType = PatternPointerMap<IndexedData, IndexedDataHandler>>
int IndexedPatternPointerModel< MapType >::getmodelversion ( ) const
inlinevirtual

Returns the version of the model implementation and binary serialisation format

Reimplemented from PatternModel< IndexedData, IndexedDataHandler, MapType, PatternPointer >.


The documentation for this class was generated from the following file: