Colibri Core
Public Member Functions | List of all members
PatternPointerModel< ValueType, ValueHandler, MapType > Class Template Reference

#include <patternmodel.h>

Inheritance diagram for PatternPointerModel< ValueType, ValueHandler, MapType >:
PatternModel< ValueType, ValueHandler, MapType, PatternPointer > PatternModelInterface PatternStoreInterface PatternStoreInterface

Public Member Functions

 PatternPointerModel (IndexedCorpus *corpus)
 
 PatternPointerModel (std::istream *f, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 PatternPointerModel (const std::string filename, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
int getmodeltype () const
 
int getmodelversion () const
 
virtual void add (const PatternPointer &patternpointer, const IndexReference &ref)
 
virtual void add (const PatternPointer &pattern, ValueType *value, const IndexReference &ref)
 
- Public Member Functions inherited from PatternModel< ValueType, ValueHandler, MapType, PatternPointer >
 PatternModel (IndexedCorpus *corpus=NULL)
 
 PatternModel (std::istream *f, PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 PatternModel (const std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL)
 
 ~PatternModel ()
 
virtual size_t size () const
 
virtual bool has (const Pattern &pattern) const
 
virtual bool has (const PatternPointer &pattern) const
 
virtual void load (std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL)
 
virtual void load (std::istream *f, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL)
 
PatternModelInterfacegetinterface ()
 
virtual void train (std::istream *in, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false)
 
virtual void train (const std::string &filename, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false)
 
virtual int computeskipgrams (const PatternPointer &pattern, int mintokens=2, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, std::vector< PatternPointer > *targetcontainer=NULL, const bool exhaustive=false, const int maxskips=3, const bool DEBUG=false)
 
virtual int computeskipgrams (const PatternPointer &pattern, PatternModelOptions &options, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, const bool exhaustive=false)
 
virtual std::vector< PatternPointerfindskipgrams (const PatternPointer &pattern, unsigned int occurrencethreshold=1, int maxskips=3)
 
virtual void trainskipgrams (const PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL)
 
void test (MapType &target, std::istream *in)
 
void write (std::ostream *out)
 
void write (const std::string filename)
 
virtual int maxlength () const
 
virtual int minlength () const
 
virtual unsigned int occurrencecount (const Pattern &pattern)
 
virtual unsigned int occurrencecount (const PatternPointer &pattern)
 
virtual ValueType * getdata (const Pattern &pattern, bool makeifnew=false)
 
virtual ValueType * getdata (const PatternPointer &pattern, bool makeifnew=false)
 
virtual unsigned int types ()
 
virtual unsigned int tokens () const
 
unsigned char type () const
 
unsigned char version () const
 
void output (std::ostream *)
 
unsigned int coveragecount (const Pattern &key)
 
double coverage (const Pattern &key)
 
std::vector< PatternPointergetreverseindex (const IndexReference ref, int occurrencecount=0, int category=0, unsigned int size=0)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_bysentence (int sentence)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_right (const IndexReference ref)
 
std::vector< std::pair< IndexReference, PatternPointer > > getreverseindex_left (const IndexReference ref)
 
void computestats ()
 
virtual void resetstats ()
 
virtual void computecoveragestats (int category=0, int n=0)
 
unsigned int totaloccurrencesingroup (int category, int n)
 
unsigned int totalpatternsingroup (int category, int n)
 
unsigned int totalwordtypesingroup (int category, int n)
 
unsigned int totaltokensingroup (int category, int n)
 
double frequency (const Pattern &pattern)
 
virtual void add (const Pattern &pattern, ValueType *value, const IndexReference &ref)
 
unsigned int prune (int threshold, int _n=0)
 
virtual unsigned int pruneskipgrams (unsigned int threshold, int minskiptypes=2, int _n=0)
 
unsigned int prunenotinset (const std::unordered_set< Pattern > &s, int _n)
 
unsigned int prunebymodel (PatternModel< ValueType2, ValueHandler2, MapType2 > &secondmodel)
 
std::vector< std::pair< Pattern, int > > getpatterns (const Pattern &pattern)
 
virtual void print (std::ostream *out, ClassDecoder &decoder)
 
virtual void print (std::ostream *out, ClassDecoder &decoder, const PatternPointer &pattern, bool endline=true)
 
virtual void printreverseindex (std::ostream *out, ClassDecoder &decoder)
 
void printmodel (std::ostream *out, ClassDecoder &decoder)
 
void printpattern (std::ostream *out, ClassDecoder &decoder, const Pattern &pattern, bool endline=true)
 
void histogram (std::map< unsigned int, unsigned int > &hist, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0)
 
void histogram (std::ostream *OUT, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0)
 
unsigned int topthreshold (int amount, int category=0, int size=0)
 
void info (std::ostream *OUT)
 
void report (std::ostream *OUT)
 
PatternSet< uint64_t > extractset (int minlength=1, int maxlength=1)
 
virtual void outputrelations (const Pattern &pattern, ClassDecoder &classdecoder, std::ostream *OUT)
 
virtual t_relationmap getsubchildren (const Pattern &pattern, int=0, int=0, int=0)
 
virtual t_relationmap getsubparents (const Pattern &pattern, int=0, int=0, int=0)
 
virtual t_relationmap gettemplates (const Pattern &pattern, int=0)
 
virtual t_relationmap getinstances (const Pattern &pattern, int=0)
 
virtual t_relationmap getskipcontent (const PatternPointer &pattern)
 
virtual t_relationmap getleftneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0)
 
virtual t_relationmap getrightneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0)
 
virtual t_relationmap_double getnpmi (const Pattern &pattern, double threshold)
 
virtual int computeflexgrams_fromskipgrams ()
 
virtual int computeflexgrams_fromcooc ()
 
virtual void outputcooc_npmi (std::ostream *OUT, ClassDecoder &classdecoder, double threshold)
 
virtual void outputcooc (std::ostream *OUT, ClassDecoder &classdecoder, double threshold)
 
- Public Member Functions inherited from PatternModelInterface
virtual PatternStoreInterfacegetstoreinterface ()
 
virtual PatternStoreInterfacegetstoreinterface ()
 

Additional Inherited Members

- Public Types inherited from PatternModel< ValueType, ValueHandler, MapType, PatternPointer >
typedef MapType::iterator iterator
 
typedef MapType::const_iterator const_iterator
 
- Public Attributes inherited from PatternModel< ValueType, ValueHandler, MapType, PatternPointer >
IndexedCorpusreverseindex
 Pointer to the reverse index and corpus data for this model (or NULL) More...
 
bool reverseindex_internal
 
bool hasskipgrams
 Does this model have skipgrams? More...
 
- Protected Member Functions inherited from PatternModel< ValueType, ValueHandler, MapType, PatternPointer >
virtual void postread (const PatternModelOptions options)
 
virtual void posttrain (const PatternModelOptions options)
 
- Protected Attributes inherited from PatternModel< ValueType, ValueHandler, MapType, PatternPointer >
unsigned char model_type
 
unsigned char model_version
 
uint64_t totaltokens
 Total number of tokens in the original corpus, so INCLUDES TOKENS NOT COVERED BY THE MODEL! More...
 
uint64_t totaltypes
 Total number of unigram/word types in the original corpus, SO INCLUDING NOT COVERED BY THE MODEL! More...
 
int maxn
 
int minn
 
std::set< int > cache_categories
 
std::set< int > cache_n
 
std::map< int, std::map< int, unsigned int > > cache_grouptotal
 total occurrences (used for frequency computation, within a group) More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotalpatterns
 total distinct patterns per group More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotalwordtypes
 total covered word types per group More...
 
std::map< int, std::map< int, unsigned int > > cache_grouptotaltokens
 total covered tokens per group More...
 
std::map< int, std::vector< uint32_t > > gapmasks
 pre-computed masks representing possible gap configurations for various pattern lengths More...
 

Constructor & Destructor Documentation

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternPointerMap<ValueType, BaseValueHandler<ValueType>>>
PatternPointerModel< ValueType, ValueHandler, MapType >::PatternPointerModel ( IndexedCorpus corpus)
inline
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternPointerMap<ValueType, BaseValueHandler<ValueType>>>
PatternPointerModel< ValueType, ValueHandler, MapType >::PatternPointerModel ( std::istream *  f,
const PatternModelOptions  options,
PatternModelInterface constrainmodel = NULL,
IndexedCorpus corpus = NULL 
)
inline

Read a pattern model from an input stream

Parameters
fThe input stream
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
corpusPointer to the loaded corpus, used as a reverse index.
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternPointerMap<ValueType, BaseValueHandler<ValueType>>>
PatternPointerModel< ValueType, ValueHandler, MapType >::PatternPointerModel ( const std::string  filename,
const PatternModelOptions  options,
PatternModelInterface constrainmodel = NULL,
IndexedCorpus corpus = NULL 
)
inline

Read a pattern model from file

Parameters
filenameThe filename
optionsOptions for reading, these act as filter for the data, allowing you to raise thresholds etc
constrainmodelPointer to another pattern model which should be used to constrain the loading of this one, only patterns also occurring in the other model will be included. Defaults to NULL (no constraining)
corpusPointer to the loaded corpus, used as a reverse index.

Member Function Documentation

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternPointerMap<ValueType, BaseValueHandler<ValueType>>>
virtual void PatternPointerModel< ValueType, ValueHandler, MapType >::add ( const PatternPointer patternpointer,
const IndexReference ref 
)
inlinevirtual

Add a pattern, with a given position, to the model. This is called during training at every time an instance of a pattern is found in the data. This is the high-level version.

Parameters
patternThe pattern to add (a patternpointer)
refThe position in the corpus where the patterns occurs

Reimplemented from PatternModel< ValueType, ValueHandler, MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternPointerMap<ValueType, BaseValueHandler<ValueType>>>
virtual void PatternPointerModel< ValueType, ValueHandler, MapType >::add ( const PatternPointer pattern,
ValueType *  value,
const IndexReference ref 
)
inlinevirtual
template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternPointerMap<ValueType, BaseValueHandler<ValueType>>>
int PatternPointerModel< ValueType, ValueHandler, MapType >::getmodeltype ( ) const
inlinevirtual

Returns the type of model (a value from ModelType)

Reimplemented from PatternModel< ValueType, ValueHandler, MapType, PatternPointer >.

template<class ValueType, class ValueHandler = BaseValueHandler<ValueType>, class MapType = PatternPointerMap<ValueType, BaseValueHandler<ValueType>>>
int PatternPointerModel< ValueType, ValueHandler, MapType >::getmodelversion ( ) const
inlinevirtual

Returns the version of the model implementation and binary serialisation format

Reimplemented from PatternModel< ValueType, ValueHandler, MapType, PatternPointer >.


The documentation for this class was generated from the following file: