|
| | PatternPointerModel (IndexedCorpus *corpus) |
| |
| | PatternPointerModel (std::istream *f, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL) |
| |
| | PatternPointerModel (const std::string filename, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL) |
| |
| int | getmodeltype () const |
| |
| int | getmodelversion () const |
| |
| virtual void | add (const PatternPointer &patternpointer, const IndexReference &ref) |
| |
| virtual void | add (const PatternPointer &pattern, ValueType *value, const IndexReference &ref) |
| |
| | PatternModel (IndexedCorpus *corpus=NULL) |
| |
| | PatternModel (std::istream *f, PatternModelOptions options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL) |
| |
| | PatternModel (const std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL, IndexedCorpus *corpus=NULL) |
| |
| | ~PatternModel () |
| |
| virtual size_t | size () const |
| |
| virtual bool | has (const Pattern &pattern) const |
| |
| virtual bool | has (const PatternPointer &pattern) const |
| |
| virtual void | load (std::string &filename, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL) |
| |
| virtual void | load (std::istream *f, const PatternModelOptions &options, PatternModelInterface *constrainmodel=NULL) |
| |
| PatternModelInterface * | getinterface () |
| |
| virtual void | train (std::istream *in, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false) |
| |
| virtual void | train (const std::string &filename, PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL, bool continued=false, uint32_t firstsentence=1, bool ignoreerrors=false) |
| |
| virtual int | computeskipgrams (const PatternPointer &pattern, int mintokens=2, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, std::vector< PatternPointer > *targetcontainer=NULL, const bool exhaustive=false, const int maxskips=3, const bool DEBUG=false) |
| |
| virtual int | computeskipgrams (const PatternPointer &pattern, PatternModelOptions &options, const IndexReference *singleref=NULL, const IndexedData *multiplerefs=NULL, PatternModelInterface *constrainbymodel=NULL, const bool exhaustive=false) |
| |
| virtual std::vector< PatternPointer > | findskipgrams (const PatternPointer &pattern, unsigned int occurrencethreshold=1, int maxskips=3) |
| |
| virtual void | trainskipgrams (const PatternModelOptions options, PatternModelInterface *constrainbymodel=NULL) |
| |
| void | test (MapType &target, std::istream *in) |
| |
| void | write (std::ostream *out) |
| |
| void | write (const std::string filename) |
| |
| virtual int | maxlength () const |
| |
| virtual int | minlength () const |
| |
| virtual unsigned int | occurrencecount (const Pattern &pattern) |
| |
| virtual unsigned int | occurrencecount (const PatternPointer &pattern) |
| |
| virtual ValueType * | getdata (const Pattern &pattern, bool makeifnew=false) |
| |
| virtual ValueType * | getdata (const PatternPointer &pattern, bool makeifnew=false) |
| |
| virtual unsigned int | types () |
| |
| virtual unsigned int | tokens () const |
| |
| unsigned char | type () const |
| |
| unsigned char | version () const |
| |
| void | output (std::ostream *) |
| |
| unsigned int | coveragecount (const Pattern &key) |
| |
| double | coverage (const Pattern &key) |
| |
| std::vector< PatternPointer > | getreverseindex (const IndexReference ref, int occurrencecount=0, int category=0, unsigned int size=0) |
| |
| std::vector< std::pair< IndexReference, PatternPointer > > | getreverseindex_bysentence (int sentence) |
| |
| std::vector< std::pair< IndexReference, PatternPointer > > | getreverseindex_right (const IndexReference ref) |
| |
| std::vector< std::pair< IndexReference, PatternPointer > > | getreverseindex_left (const IndexReference ref) |
| |
| void | computestats () |
| |
| virtual void | resetstats () |
| |
| virtual void | computecoveragestats (int category=0, int n=0) |
| |
| unsigned int | totaloccurrencesingroup (int category, int n) |
| |
| unsigned int | totalpatternsingroup (int category, int n) |
| |
| unsigned int | totalwordtypesingroup (int category, int n) |
| |
| unsigned int | totaltokensingroup (int category, int n) |
| |
| double | frequency (const Pattern &pattern) |
| |
| virtual void | add (const Pattern &pattern, ValueType *value, const IndexReference &ref) |
| |
| unsigned int | prune (int threshold, int _n=0) |
| |
| virtual unsigned int | pruneskipgrams (unsigned int threshold, int minskiptypes=2, int _n=0) |
| |
| unsigned int | prunenotinset (const std::unordered_set< Pattern > &s, int _n) |
| |
| unsigned int | prunebymodel (PatternModel< ValueType2, ValueHandler2, MapType2 > &secondmodel) |
| |
| std::vector< std::pair< Pattern, int > > | getpatterns (const Pattern &pattern) |
| |
| virtual void | print (std::ostream *out, ClassDecoder &decoder) |
| |
| virtual void | print (std::ostream *out, ClassDecoder &decoder, const PatternPointer &pattern, bool endline=true) |
| |
| virtual void | printreverseindex (std::ostream *out, ClassDecoder &decoder) |
| |
| void | printmodel (std::ostream *out, ClassDecoder &decoder) |
| |
| void | printpattern (std::ostream *out, ClassDecoder &decoder, const Pattern &pattern, bool endline=true) |
| |
| void | histogram (std::map< unsigned int, unsigned int > &hist, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0) |
| |
| void | histogram (std::ostream *OUT, unsigned int threshold=0, unsigned int cap=0, int category=0, unsigned int size=0) |
| |
| unsigned int | topthreshold (int amount, int category=0, int size=0) |
| |
| void | info (std::ostream *OUT) |
| |
| void | report (std::ostream *OUT) |
| |
| PatternSet< uint64_t > | extractset (int minlength=1, int maxlength=1) |
| |
| virtual void | outputrelations (const Pattern &pattern, ClassDecoder &classdecoder, std::ostream *OUT) |
| |
| virtual t_relationmap | getsubchildren (const Pattern &pattern, int=0, int=0, int=0) |
| |
| virtual t_relationmap | getsubparents (const Pattern &pattern, int=0, int=0, int=0) |
| |
| virtual t_relationmap | gettemplates (const Pattern &pattern, int=0) |
| |
| virtual t_relationmap | getinstances (const Pattern &pattern, int=0) |
| |
| virtual t_relationmap | getskipcontent (const PatternPointer &pattern) |
| |
| virtual t_relationmap | getleftneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0) |
| |
| virtual t_relationmap | getrightneighbours (const Pattern &pattern, int=0, int=0, int=0, int=0) |
| |
| virtual t_relationmap_double | getnpmi (const Pattern &pattern, double threshold) |
| |
| virtual int | computeflexgrams_fromskipgrams () |
| |
| virtual int | computeflexgrams_fromcooc () |
| |
| virtual void | outputcooc_npmi (std::ostream *OUT, ClassDecoder &classdecoder, double threshold) |
| |
| virtual void | outputcooc (std::ostream *OUT, ClassDecoder &classdecoder, double threshold) |
| |
| virtual PatternStoreInterface * | getstoreinterface () |
| |
| virtual PatternStoreInterface * | getstoreinterface () |
| |
|
| typedef MapType::iterator | iterator |
| |
| typedef MapType::const_iterator | const_iterator |
| |
| IndexedCorpus * | reverseindex |
| | Pointer to the reverse index and corpus data for this model (or NULL) More...
|
| |
| bool | reverseindex_internal |
| |
| bool | hasskipgrams |
| | Does this model have skipgrams? More...
|
| |
| virtual void | postread (const PatternModelOptions options) |
| |
| virtual void | posttrain (const PatternModelOptions options) |
| |
| unsigned char | model_type |
| |
| unsigned char | model_version |
| |
| uint64_t | totaltokens |
| | Total number of tokens in the original corpus, so INCLUDES TOKENS NOT COVERED BY THE MODEL! More...
|
| |
| uint64_t | totaltypes |
| | Total number of unigram/word types in the original corpus, SO INCLUDING NOT COVERED BY THE MODEL! More...
|
| |
| int | maxn |
| |
| int | minn |
| |
| std::set< int > | cache_categories |
| |
| std::set< int > | cache_n |
| |
| std::map< int, std::map< int, unsigned int > > | cache_grouptotal |
| | total occurrences (used for frequency computation, within a group) More...
|
| |
| std::map< int, std::map< int, unsigned int > > | cache_grouptotalpatterns |
| | total distinct patterns per group More...
|
| |
| std::map< int, std::map< int, unsigned int > > | cache_grouptotalwordtypes |
| | total covered word types per group More...
|
| |
| std::map< int, std::map< int, unsigned int > > | cache_grouptotaltokens |
| | total covered tokens per group More...
|
| |
| std::map< int, std::vector< uint32_t > > | gapmasks |
| | pre-computed masks representing possible gap configurations for various pattern lengths More...
|
| |