Colibri Core
Public Member Functions | Public Attributes | Static Public Attributes | List of all members
PatternPointer Class Reference

#include <pattern.h>

Public Member Functions

 PatternPointer ()
 
 PatternPointer (unsigned char *dataref, const unsigned int bytesize)
 
 PatternPointer (const Pattern &ref)
 
 PatternPointer (const Pattern *ref)
 
 PatternPointer (const PatternPointer &ref)
 
 PatternPointer (const PatternPointer *ref)
 
PatternPointeroperator= (const PatternPointer &other)
 
 PatternPointer (std::istream *in, bool ignoreeol=false, const unsigned char version=2, unsigned char *corpusstart=NULL, bool debug=false)
 
void write (std::ostream *out, const unsigned char *corpusstart=NULL) const
 
 PatternPointer (unsigned char *, unsigned int, unsigned int)
 
 PatternPointer (const PatternPointer &, unsigned int, unsigned int)
 
 PatternPointer (const Pattern &, unsigned int, unsigned int)
 
uint32_t computemask () const
 
const size_t n () const
 
const size_t bytesize () const
 
const size_t size () const
 
const size_t hash () const
 
const PatternCategory category () const
 
const bool isskipgram () const
 
const bool isflexgram () const
 
std::string tostring (const ClassDecoder &classdecoder) const
 
std::string decode (const ClassDecoder &classdecoder) const
 
bool out () const
 
bool operator== (const PatternPointer &other) const
 
bool operator!= (const PatternPointer &other) const
 
bool operator== (const Pattern &other) const
 
bool operator!= (const Pattern &other) const
 
PatternPointer toflexgram () const
 
bool isgap (int i) const
 
PatternPointeroperator++ ()
 
bool operator< (const PatternPointer &other) const
 
int ngrams (std::vector< PatternPointer > &container, const int n) const
 
int subngrams (std::vector< PatternPointer > &container, int minn=1, int maxn=9) const
 
int ngrams (std::vector< std::pair< PatternPointer, int >> &container, const int n) const
 
int subngrams (std::vector< std::pair< PatternPointer, int >> &container, int minn=1, int maxn=9) const
 
int parts (std::vector< PatternPointer > &container) const
 
int parts (std::vector< std::pair< int, int > > &container) const
 
int gaps (std::vector< std::pair< int, int > > &container) const
 
const unsigned int skipcount () const
 
PatternPointer addskip (const std::pair< int, int > &gap) const
 
PatternPointer addskips (const std::vector< std::pair< int, int > > &gaps) const
 
int flexcollapse (unsigned char *collapseddata) const
 
bool instanceof (const Pattern &skipgram) const
 
 operator Pattern ()
 
Pattern pattern () const
 

Public Attributes

unsigned char * data
 
uint32_t bytes
 
uint32_t mask
 

Static Public Attributes

static const int patterntype = PATTERNPOINTER
 

Constructor & Destructor Documentation

PatternPointer::PatternPointer ( )
inline
PatternPointer::PatternPointer ( unsigned char *  dataref,
const unsigned int  bytesize 
)
inline
PatternPointer::PatternPointer ( const Pattern ref)
inline
PatternPointer::PatternPointer ( const Pattern ref)
inline
PatternPointer::PatternPointer ( const PatternPointer ref)
inline
PatternPointer::PatternPointer ( const PatternPointer ref)
inline
PatternPointer::PatternPointer ( std::istream *  in,
bool  ignoreeol = false,
const unsigned char  version = 2,
unsigned char *  corpusstart = NULL,
bool  debug = false 
)
PatternPointer::PatternPointer ( unsigned char *  ref,
unsigned int  begin,
unsigned int  length 
)
PatternPointer::PatternPointer ( const PatternPointer ref,
unsigned int  begin,
unsigned int  length 
)
PatternPointer::PatternPointer ( const Pattern ,
unsigned  int,
unsigned  int 
)

Member Function Documentation

PatternPointer PatternPointer::addskip ( const std::pair< int, int > &  gap) const

Replaces a series of tokens with a skip/gap of a particular size. Effectively turns a pattern into a skipgram.

Parameters
gapThe position and size of the skip/gap: a pair consisting of a begin index (0-indexed) and a length, i.e. the size of the skip
PatternPointer PatternPointer::addskips ( const std::vector< std::pair< int, int > > &  gaps) const

Replaces multiple series of tokens with skips/gaps of particular sizes. Effectively turns a pattern into a skipgram.

Parameters
gapsThe positions and sizes of the gaps: a vector of pairs, each pair consisting of a begin index (0-indexed) and a length, indicating where to place the gap
Returns
A skipgram
const size_t PatternPointer::bytesize ( ) const
inline
const PatternCategory PatternPointer::category ( ) const
uint32_t PatternPointer::computemask ( ) const
std::string PatternPointer::decode ( const ClassDecoder classdecoder) const
inline
int PatternPointer::flexcollapse ( unsigned char *  collapseddata) const

Low-level function for flexgrams, that returns a collapsed comparable representation of the flexgram in collapseddata (has to be pre-allocated). Return value is the number of bytes of the representation. In the collapsed representation adjacent flexgrams are removed.

int PatternPointer::gaps ( std::vector< std::pair< int, int > > &  container) const

Finds all the gaps of a skipgram or flexgram., parts are the portions that are not skips and adds them to container as begin,length pairs... Thus 'to be {*} not {*} be' has three parts. The gap-length of a flexgram will always be its minimum length one.

const size_t PatternPointer::hash ( ) const

Compute a hash value for this pattern

bool PatternPointer::instanceof ( const Pattern skipgram) const
const bool PatternPointer::isflexgram ( ) const
inline
bool PatternPointer::isgap ( int  i) const
const bool PatternPointer::isskipgram ( ) const
inline
const size_t PatternPointer::n ( ) const
int PatternPointer::ngrams ( std::vector< PatternPointer > &  container,
const int  n 
) const
int PatternPointer::ngrams ( std::vector< std::pair< PatternPointer, int >> &  container,
const int  n 
) const
PatternPointer::operator Pattern ( )
inline
bool PatternPointer::operator!= ( const PatternPointer other) const
inline
bool PatternPointer::operator!= ( const Pattern other) const
inline
PatternPointer & PatternPointer::operator++ ( )

Return a new patternpointer one token to the right, maintaining the same token length and same skip configuration (if any). Note that this will cause segmentation faults if the new PatternPointer exceeds the original data!!! It's up to the caller to check this!

bool PatternPointer::operator< ( const PatternPointer other) const
inline
PatternPointer& PatternPointer::operator= ( const PatternPointer other)
inline
bool PatternPointer::operator== ( const PatternPointer other) const
bool PatternPointer::operator== ( const Pattern other) const
bool PatternPointer::out ( ) const
int PatternPointer::parts ( std::vector< PatternPointer > &  container) const

Finds all the parts of a skipgram, parts are the portions that are not skips and adds them to container... Thus 'to be {*} not {*} be' has three parts

int PatternPointer::parts ( std::vector< std::pair< int, int > > &  container) const

Finds all the parts of a skipgram, parts are the portions that are not skips and adds them to container as begin,length pairs... Thus 'to be {*} not {*} be' has three parts

Pattern PatternPointer::pattern ( ) const
inline
const size_t PatternPointer::size ( ) const
inline
const unsigned int PatternPointer::skipcount ( ) const

return the number of skips in this pattern

int PatternPointer::subngrams ( std::vector< PatternPointer > &  container,
int  minn = 1,
int  maxn = 9 
) const
int PatternPointer::subngrams ( std::vector< std::pair< PatternPointer, int >> &  container,
int  minn = 1,
int  maxn = 9 
) const
PatternPointer PatternPointer::toflexgram ( ) const
std::string PatternPointer::tostring ( const ClassDecoder classdecoder) const
void PatternPointer::write ( std::ostream *  out,
const unsigned char *  corpusstart = NULL 
) const

Write Pattern to output stream (in binary form)

Parameters
outThe output stream

Member Data Documentation

uint32_t PatternPointer::bytes

Pointer to Pattern data

unsigned char* PatternPointer::data
uint32_t PatternPointer::mask
const int PatternPointer::patterntype = PATTERNPOINTER
static

The documentation for this class was generated from the following files: