Colibri Core
|
#include <pattern.h>
Public Member Functions | |
PatternPointer () | |
PatternPointer (unsigned char *dataref, const unsigned int bytesize) | |
PatternPointer (const Pattern &ref) | |
PatternPointer (const Pattern *ref) | |
PatternPointer (const PatternPointer &ref) | |
PatternPointer (const PatternPointer *ref) | |
PatternPointer & | operator= (const PatternPointer &other) |
PatternPointer (std::istream *in, bool ignoreeol=false, const unsigned char version=2, unsigned char *corpusstart=NULL, bool debug=false) | |
void | write (std::ostream *out, const unsigned char *corpusstart=NULL) const |
PatternPointer (unsigned char *, unsigned int, unsigned int) | |
PatternPointer (const PatternPointer &, unsigned int, unsigned int) | |
PatternPointer (const Pattern &, unsigned int, unsigned int) | |
uint32_t | computemask () const |
const size_t | n () const |
const size_t | bytesize () const |
const size_t | size () const |
const size_t | hash () const |
const PatternCategory | category () const |
const bool | isskipgram () const |
const bool | isflexgram () const |
std::string | tostring (const ClassDecoder &classdecoder) const |
std::string | decode (const ClassDecoder &classdecoder) const |
bool | out () const |
bool | operator== (const PatternPointer &other) const |
bool | operator!= (const PatternPointer &other) const |
bool | operator== (const Pattern &other) const |
bool | operator!= (const Pattern &other) const |
PatternPointer | toflexgram () const |
bool | isgap (int i) const |
PatternPointer & | operator++ () |
bool | operator< (const PatternPointer &other) const |
int | ngrams (std::vector< PatternPointer > &container, const int n) const |
int | subngrams (std::vector< PatternPointer > &container, int minn=1, int maxn=9) const |
int | ngrams (std::vector< std::pair< PatternPointer, int >> &container, const int n) const |
int | subngrams (std::vector< std::pair< PatternPointer, int >> &container, int minn=1, int maxn=9) const |
int | parts (std::vector< PatternPointer > &container) const |
int | parts (std::vector< std::pair< int, int > > &container) const |
int | gaps (std::vector< std::pair< int, int > > &container) const |
const unsigned int | skipcount () const |
PatternPointer | addskip (const std::pair< int, int > &gap) const |
PatternPointer | addskips (const std::vector< std::pair< int, int > > &gaps) const |
int | flexcollapse (unsigned char *collapseddata) const |
bool | instanceof (const Pattern &skipgram) const |
operator Pattern () | |
Pattern | pattern () const |
Public Attributes | |
unsigned char * | data |
uint32_t | bytes |
uint32_t | mask |
Static Public Attributes | |
static const int | patterntype = PATTERNPOINTER |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
PatternPointer::PatternPointer | ( | std::istream * | in, |
bool | ignoreeol = false , |
||
const unsigned char | version = 2 , |
||
unsigned char * | corpusstart = NULL , |
||
bool | debug = false |
||
) |
PatternPointer::PatternPointer | ( | unsigned char * | ref, |
unsigned int | begin, | ||
unsigned int | length | ||
) |
PatternPointer::PatternPointer | ( | const PatternPointer & | ref, |
unsigned int | begin, | ||
unsigned int | length | ||
) |
PatternPointer::PatternPointer | ( | const Pattern & | , |
unsigned | int, | ||
unsigned | int | ||
) |
PatternPointer PatternPointer::addskip | ( | const std::pair< int, int > & | gap | ) | const |
Replaces a series of tokens with a skip/gap of a particular size. Effectively turns a pattern into a skipgram.
gap | The position and size of the skip/gap: a pair consisting of a begin index (0-indexed) and a length, i.e. the size of the skip |
PatternPointer PatternPointer::addskips | ( | const std::vector< std::pair< int, int > > & | gaps | ) | const |
Replaces multiple series of tokens with skips/gaps of particular sizes. Effectively turns a pattern into a skipgram.
gaps | The positions and sizes of the gaps: a vector of pairs, each pair consisting of a begin index (0-indexed) and a length, indicating where to place the gap |
|
inline |
const PatternCategory PatternPointer::category | ( | ) | const |
uint32_t PatternPointer::computemask | ( | ) | const |
|
inline |
int PatternPointer::flexcollapse | ( | unsigned char * | collapseddata | ) | const |
Low-level function for flexgrams, that returns a collapsed comparable representation of the flexgram in collapseddata (has to be pre-allocated). Return value is the number of bytes of the representation. In the collapsed representation adjacent flexgrams are removed.
int PatternPointer::gaps | ( | std::vector< std::pair< int, int > > & | container | ) | const |
Finds all the gaps of a skipgram or flexgram., parts are the portions that are not skips and adds them to container as begin,length pairs... Thus 'to be {*} not {*} be' has three parts. The gap-length of a flexgram will always be its minimum length one.
const size_t PatternPointer::hash | ( | ) | const |
Compute a hash value for this pattern
bool PatternPointer::instanceof | ( | const Pattern & | skipgram | ) | const |
|
inline |
bool PatternPointer::isgap | ( | int | i | ) | const |
|
inline |
const size_t PatternPointer::n | ( | ) | const |
int PatternPointer::ngrams | ( | std::vector< PatternPointer > & | container, |
const int | n | ||
) | const |
int PatternPointer::ngrams | ( | std::vector< std::pair< PatternPointer, int >> & | container, |
const int | n | ||
) | const |
|
inline |
|
inline |
|
inline |
PatternPointer & PatternPointer::operator++ | ( | ) |
Return a new patternpointer one token to the right, maintaining the same token length and same skip configuration (if any). Note that this will cause segmentation faults if the new PatternPointer exceeds the original data!!! It's up to the caller to check this!
|
inline |
|
inline |
bool PatternPointer::operator== | ( | const PatternPointer & | other | ) | const |
bool PatternPointer::operator== | ( | const Pattern & | other | ) | const |
bool PatternPointer::out | ( | ) | const |
int PatternPointer::parts | ( | std::vector< PatternPointer > & | container | ) | const |
Finds all the parts of a skipgram, parts are the portions that are not skips and adds them to container... Thus 'to be {*} not {*} be' has three parts
int PatternPointer::parts | ( | std::vector< std::pair< int, int > > & | container | ) | const |
Finds all the parts of a skipgram, parts are the portions that are not skips and adds them to container as begin,length pairs... Thus 'to be {*} not {*} be' has three parts
|
inline |
|
inline |
const unsigned int PatternPointer::skipcount | ( | ) | const |
return the number of skips in this pattern
int PatternPointer::subngrams | ( | std::vector< PatternPointer > & | container, |
int | minn = 1 , |
||
int | maxn = 9 |
||
) | const |
int PatternPointer::subngrams | ( | std::vector< std::pair< PatternPointer, int >> & | container, |
int | minn = 1 , |
||
int | maxn = 9 |
||
) | const |
PatternPointer PatternPointer::toflexgram | ( | ) | const |
std::string PatternPointer::tostring | ( | const ClassDecoder & | classdecoder | ) | const |
void PatternPointer::write | ( | std::ostream * | out, |
const unsigned char * | corpusstart = NULL |
||
) | const |
Write Pattern to output stream (in binary form)
out | The output stream |
uint32_t PatternPointer::bytes |
Pointer to Pattern data
unsigned char* PatternPointer::data |
uint32_t PatternPointer::mask |
|
static |