|
| PatternPointerModel (IndexedCorpus *corpus) |
|
| PatternPointerModel (std::istream *f, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL) |
|
| PatternPointerModel (const std::string filename, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL) |
|
int | getmodeltype () const |
|
int | getmodelversion () const |
|
virtual void | add (const PatternPointer &patternpointer, const IndexReference &ref) |
|
virtual void | add (const PatternPointer &pattern, ValueType *value, const IndexReference &ref) |
|
| PatternModel (IndexedCorpus *corpus=NULL) |
|
| PatternModel (std::istream *f, PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL) |
|
| PatternModel (const std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL) |
|
| ~PatternModel () |
|
virtual size_t | size () const |
|
virtual bool | has (const Pattern &pattern) const |
|
virtual bool | has (const PatternPointer &pattern) const |
|
virtual void | load (std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL) |
|
virtual void | load (std::istream *f, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL) |
|
PatternModelInterface * | getinterface () |
|
virtual void | train (std::istream *in, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false) |
|
virtual void | train (const std::string &filename, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false) |
|
virtual int | computeskipgrams (const PatternPointer &pattern, int mintokens=2, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, std::vector< PatternPointer > *targetcontainer=NULL, const bool exhaustive=false, const int maxskips=3, const bool DEBUG=false) |
|
virtual int | computeskipgrams (const PatternPointer &pattern, PatternModelOptions &options, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, const bool exhaustive=false) |
|
virtual std::vector< PatternPointer > | findskipgrams (const PatternPointer &pattern, unsigned int occurrencethreshold=1, int maxskips=3) |
|
virtual void | trainskipgrams (const PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL) |
|
void | test (MapType &target, std::istream *in) |
|
void | write (std::ostream *out) |
|
void | write (const std::string filename) |
|
virtual int | maxlength () const |
|
virtual int | minlength () const |
|
virtual unsigned int | occurrencecount (const Pattern &pattern) |
|
virtual unsigned int | occurrencecount (const PatternPointer &pattern) |
|
virtual ValueType * | getdata (const Pattern &pattern, bool makeifnew=false) |
|
virtual ValueType * | getdata (const PatternPointer &pattern, bool makeifnew=false) |
|
virtual unsigned int | types () |
|
virtual unsigned int | tokens () const |
|
unsigned char | type () const |
|
unsigned char | version () const |
|
void | output (std::ostream *) |
|
unsigned int | coveragecount (const Pattern &key) |
|
double | coverage (const Pattern &key) |
|
std::vector< PatternPointer > | getreverseindex (const IndexReference ref, int occurrencecount=0, int category=0, unsigned int size=0) |
|
std::vector< std::pair< IndexReference, PatternPointer > > | getreverseindex_bysentence (int sentence) |
|
std::vector< std::pair< IndexReference, PatternPointer > > | getreverseindex_right (const IndexReference ref) |
|
std::vector< std::pair< IndexReference, PatternPointer > > | getreverseindex_left (const IndexReference ref) |
|
void | computestats () |
|
virtual void | resetstats () |
|
virtual void | computecoveragestats (int category=0, int n=0) |
|
unsigned int | totaloccurrencesingroup (int category, int n) |
|
unsigned int | totalpatternsingroup (int category, int n) |
|
unsigned int | totalwordtypesingroup (int category, int n) |
|
unsigned int | totaltokensingroup (int category, int n) |
|
double | frequency (const Pattern &pattern) |
|
virtual void | add (const Pattern &pattern, ValueType *value, const IndexReference &ref) |
|
unsigned int | prune (int threshold, int _n=0) |
|
virtual unsigned int | pruneskipgrams (unsigned int threshold, int minskiptypes=2, int _n=0) |
|
unsigned int | prunenotinset (const std::unordered_set< Pattern > &s, int _n) |
|
unsigned int | prunebymodel (PatternModel< ValueType2, ValueHandler2, MapType2 > &secondmodel) |
|
std::vector< std::pair< Pattern, int > > | getpatterns (const Pattern &pattern) |
|
virtual void | print (std::ostream *out, ClassDecoder &decoder) |
|
virtual void | print (std::ostream *out, ClassDecoder &decoder, const PatternPointer &pattern, bool endline=true) |
|
virtual void | printreverseindex (std::ostream *out, ClassDecoder &decoder) |
|
void | printmodel (std::ostream *out, ClassDecoder &decoder) |
|
void | printpattern (std::ostream *out, ClassDecoder &decoder, const Pattern &pattern, bool endline=true) |
|
void | histogram (std::map< unsigned int, unsigned int > &hist, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0) |
|
void | histogram (std::ostream *OUT, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0) |
|
unsigned int | topthreshold (int amount, int category=0, int size=0) |
|
void | info (std::ostream *OUT) |
|
void | report (std::ostream *OUT) |
|
PatternSet< uint64_t > | extractset (int minlength=1, int maxlength=1) |
|
virtual void | outputrelations (const Pattern &pattern, ClassDecoder &classdecoder, std::ostream *OUT) |
|
virtual t_relationmap | getsubchildren (const Pattern &pattern, int=0, int=0, int=0) |
|
virtual t_relationmap | getsubparents (const Pattern &pattern, int=0, int=0, int=0) |
|
virtual t_relationmap | gettemplates (const Pattern &pattern, int=0) |
|
virtual t_relationmap | getinstances (const Pattern &pattern, int=0) |
|
virtual t_relationmap | getskipcontent (const PatternPointer &pattern) |
|
virtual t_relationmap | getleftneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0) |
|
virtual t_relationmap | getrightneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0) |
|
virtual t_relationmap_double | getnpmi (const Pattern &pattern, double threshold) |
|
virtual int | computeflexgrams_fromskipgrams () |
|
virtual int | computeflexgrams_fromcooc () |
|
virtual void | outputcooc_npmi (std::ostream *OUT, ClassDecoder &classdecoder, double threshold) |
|
virtual void | outputcooc (std::ostream *OUT, ClassDecoder &classdecoder, double threshold) |
|
virtual PatternStoreInterface * | getstoreinterface () |
|
virtual PatternStoreInterface * | getstoreinterface () |
|
|
typedef MapType::iterator | iterator |
|
typedef MapType::const_iterator | const_iterator |
|
IndexedCorpus * | reverseindex |
| Pointer to the reverse index and corpus data for this model (or NULL) More...
|
|
bool | reverseindex_internal |
|
bool | hasskipgrams |
| Does this model have skipgrams? More...
|
|
virtual void | postread (const PatternModelOptions options) |
|
virtual void | posttrain (const PatternModelOptions options) |
|
unsigned char | model_type |
|
unsigned char | model_version |
|
uint64_t | totaltokens |
| Total number of tokens in the original corpus, so INCLUDES TOKENS NOT COVERED BY THE MODEL! More...
|
|
uint64_t | totaltypes |
| Total number of unigram/word types in the original corpus, SO INCLUDING NOT COVERED BY THE MODEL! More...
|
|
int | maxn |
|
int | minn |
|
std::set< int > | cache_categories |
|
std::set< int > | cache_n |
|
std::map< int, std::map< int, unsigned int > > | cache_grouptotal |
| total occurrences (used for frequency computation, within a group) More...
|
|
std::map< int, std::map< int, unsigned int > > | cache_grouptotalpatterns |
| total distinct patterns per group More...
|
|
std::map< int, std::map< int, unsigned int > > | cache_grouptotalwordtypes |
| total covered word types per group More...
|
|
std::map< int, std::map< int, unsigned int > > | cache_grouptotaltokens |
| total covered tokens per group More...
|
|
std::map< int, std::vector< uint32_t > > | gapmasks |
| pre-computed masks representing possible gap configurations for various pattern lengths More...
|
|