7 template<
class FeatureType>
27 if (n > maxn) maxn = n;
28 if (n < minn) minn = n;
47 this->
load(f,options,constrainmodel);
57 if (!options.QUIET) std::cerr <<
"Loading " << filename << std::endl;
58 std::ifstream * in =
new std::ifstream(filename.c_str());
60 std::cerr <<
"ERROR: Unable to load file " << filename << std::endl;
63 this->
load( (std::istream *) in, options, constrainmodel);
71 virtual size_t size()
const {
82 if (!options.
QUIET) std::cerr <<
"Loading " << filename << std::endl;
83 std::ifstream * in =
new std::ifstream(filename.c_str());
85 std::cerr <<
"ERROR: Unable to load file " << filename << std::endl;
88 this->
load( (std::istream *) in, options, constrainmodel);
97 f->read( (
char*) &null,
sizeof(
char));
98 f->read( (
char*) &model_type,
sizeof(
char));
99 f->read( (
char*) &model_version,
sizeof(
char));
102 std::cerr <<
"File is not a colibri alignment model file (did you try to load a different type of pattern model?)" << std::endl;
105 if (model_version > 2) {
106 std::cerr <<
"WARNING: Model is created with a newer version of Colibri Core! Attempting to continue but failure is likely..." << std::endl;
108 f->read( (
char*) &totaltokens,
sizeof(uint64_t));
109 f->read( (
char*) &totaltypes,
sizeof(uint64_t));
112 std::cerr <<
"Debug enabled, loading Alignment Model type " << (int) model_type <<
", version " << (
int) model_version << std::endl;
113 std::cerr <<
"Total tokens: " << totaltokens <<
", total types: " << totaltypes << std::endl;;
119 PatternMap<PatternFeatureVectorMap<FeatureType>,
PatternFeatureVectorMapHandler<FeatureType>>::template
read(f, options.
MINTOKENS,options.
MINLENGTH, options.
MAXLENGTH, constrainstore, !options.
DOREMOVENGRAMS, !options.
DOREMOVESKIPGRAMS, !options.
DOREMOVEFLEXGRAMS, options.
DORESET, options.
DEBUG);
120 if (options.
DEBUG) std::cerr <<
"Read " << this->
size() <<
" patterns" << std::endl;
130 out->write( (
char*) &null,
sizeof(
char));
132 out->write( (
char*) &t,
sizeof(
char));
134 out->write( (
char*) &v,
sizeof(
char));
135 out->write( (
char*) &totaltokens,
sizeof(uint64_t));
136 out->write( (
char*) &totaltypes,
sizeof(uint64_t));
140 void write(
const std::string filename) {
141 std::ofstream * out =
new std::ofstream(filename.c_str());
160 if (iter != this->
end()) {
161 return &(iter->second);
162 }
else if (makeifnew) {
163 return &((*this)[pattern]);
172 if (iter != this->
end()) {
173 return &(iter->second);
174 }
else if (makeifnew) {
175 return &((*this)[pattern]);
191 return (this->
has(pattern) && this->
getdata(pattern)->
has(pattern2));
195 return (this->
has(patternpointer) && this->
getdata(patternpointer)->
has(pattern2));
200 if (fvmap == NULL)
return NULL;
201 return fvmap->
getdata(pattern2);
204 void add(
const Pattern & pattern,
const Pattern & pattern2, std::vector<FeatureType> & features,
bool checkifexists=
true) {
207 fv =
getdata(pattern,pattern2,
true);
212 fvm->
insert(pfv, checkifexists);
215 for (
typename std::vector<FeatureType>::iterator iter = features.begin(); iter != features.end(); iter++) {
222 print(out,sourcedecoder, targetdecoder);
226 *out <<
"PATTERN\tPATTERN2\tFEATURES" << std::endl;
227 for (iterator iter = this->
begin(); iter != this->
end(); iter++) {
228 const Pattern sourcepattern = iter->first;
232 *out << sourcepattern.
tostring(sourcedecoder) <<
"\t" << targetpattern.
tostring(targetdecoder);
233 for (
typename std::vector<FeatureType>::iterator iter3 = pfv->
data.begin(); iter3 != pfv->
data.end(); iter3++) {
234 *out <<
"\t" << *iter3;
virtual PatternFeatureVectorMap< FeatureType > * getdata(const Pattern &pattern, bool makeifnew=false)
Definition: alignmodel.h:158
int MAXLENGTH
The maximum length of patterns to be loaded/extracted, inclusive (in words/tokens) (default: 100) ...
Definition: patternmodel.h:126
void read(std::istream *in, int MINTOKENS=0, int MINLENGTH=0, int MAXLENGTH=999999, PatternStoreInterface *constrainstore=NULL, bool DONGRAMS=true, bool DOSKIPGRAMS=true, bool DOFLEXGRAMS=true, bool DORESET=false, bool DEBUG=false)
Definition: patternstore.h:457
virtual bool has(const Pattern &pattern) const
Definition: alignmodel.h:74
Definition: datatypes.h:477
virtual size_t size() const
Definition: alignmodel.h:71
virtual PatternStoreInterface * getstoreinterface()
Definition: patternstore.h:381
unsigned char model_type
Definition: alignmodel.h:12
iterator find(const Pattern &pattern)
Definition: patternstore.h:816
std::string tostring(const ClassDecoder &classdecoder) const
Definition: pattern.cpp:278
void push_back(FeatureType &f)
Definition: datatypes.h:339
virtual PatternFeatureVectorMap< FeatureType > * getdata(const PatternPointer &patternpointer, bool makeifnew=false)
Definition: alignmodel.h:169
int MINTOKENS
Definition: patternmodel.h:113
Definition: pattern.h:357
bool DOREMOVESKIPGRAMS
Remove skip-grams from the model upon loading it.
Definition: patternmodel.h:146
void write(std::ostream *out)
Definition: alignmodel.h:128
Pattern class, represents a pattern (ngram, skipgram or flexgram). Encoded in a memory-saving fashion...
Definition: pattern.h:75
PatternMap< PatternFeatureVectorMap< FeatureType >, PatternFeatureVectorMapHandler< FeatureType > >::const_iterator const_iterator
Definition: alignmodel.h:148
iterator end()
Definition: patternstore.h:813
std::vector< PatternFeatureVector< FeatureType > * >::iterator iterator
Definition: datatypes.h:359
virtual bool has(const PatternPointer &patternpointer, const PatternPointer &patternpointer2)
Definition: alignmodel.h:193
unsigned char type() const
Definition: alignmodel.h:185
bool DORESET
sets all counts to zero upon loading, clears indices
Definition: patternmodel.h:148
virtual void postread(const PatternModelOptions options)
Definition: alignmodel.h:21
unsigned char version() const
Definition: alignmodel.h:186
virtual bool has(const Pattern &pattern, const Pattern &pattern2)
Definition: alignmodel.h:190
Basic read-only interface for pattern models, abstract base class.
Definition: interface.h:39
virtual void load(std::string filename, const PatternModelOptions options, PatternModelInterface *constrainmodel=NULL)
Definition: alignmodel.h:81
Limited virtual interface to pattern stores.
Definition: interface.h:20
Definition: alignmodel.h:8
virtual int getmodelversion() const
Definition: alignmodel.h:69
virtual void load(std::istream *f, PatternModelOptions options, PatternModelInterface *constrainmodel=NULL)
Definition: alignmodel.h:93
bool DEBUG
Output extra debug information.
Definition: patternmodel.h:151
bool DOREMOVEFLEXGRAMS
Remove flexgrams from the model upon loading it.
Definition: patternmodel.h:147
Class for decoding binary class-encoded data back to plain-text. The ClassDecoder maintains a mapping...
Definition: classdecoder.h:43
void write(const std::string filename)
Definition: alignmodel.h:140
virtual bool has(const PatternPointer &pattern) const
Definition: alignmodel.h:77
virtual int getmodeltype() const
Definition: alignmodel.h:68
virtual void printmodel(std::ostream *out, ClassDecoder &sourcedecoder, ClassDecoder &targetdecoder)
Definition: alignmodel.h:221
A pattern map storing patterns and their values in a hash map (unordered_map).
Definition: patternstore.h:782
virtual int occurrencecount(const Pattern &pattern)
Definition: alignmodel.h:154
unsigned char classencodingversion
Definition: patternstore.h:328
virtual void print(std::ostream *out, ClassDecoder &sourcedecoder, ClassDecoder &targetdecoder)
Definition: alignmodel.h:225
Contains classes for Pattern Models.
virtual int minlength() const
Definition: alignmodel.h:152
void add(const Pattern &pattern, const Pattern &pattern2, std::vector< FeatureType > &features, bool checkifexists=true)
Definition: alignmodel.h:204
Pattern pattern
Definition: datatypes.h:272
Definition: patternmodel.h:78
PatternMap< PatternFeatureVectorMap< FeatureType >, PatternFeatureVectorMapHandler< FeatureType > >::iterator iterator
Definition: alignmodel.h:147
void insert(PatternFeatureVector< FeatureType > *pfv, bool checkexists=true)
Definition: datatypes.h:410
bool QUIET
Don't output to stderr.
Definition: patternmodel.h:150
Options for Pattern Model loading and training.
Definition: patternmodel.h:111
int types() const
Definition: alignmodel.h:182
int maxn
Definition: alignmodel.h:17
std::vector< FeatureType > data
Definition: datatypes.h:273
iterator begin()
Definition: patternstore.h:810
uint64_t totaltokens
Definition: alignmodel.h:14
virtual PatternFeatureVector< FeatureType > * getdata(const Pattern &pattern, const Pattern &pattern2, bool makeifnew=false)
Definition: alignmodel.h:198
bool DOREMOVENGRAMS
Remove n-grams from the model upon loading it.
Definition: patternmodel.h:145
int minn
Definition: alignmodel.h:18
const size_t n() const
Definition: pattern.cpp:89
uint64_t totaltypes
Definition: alignmodel.h:15
void clear()
Definition: datatypes.h:336
int MINLENGTH
The minimum length of patterns to be loaded/extracted (in words/tokens) (default: 1) ...
Definition: patternmodel.h:125
Definition: datatypes.h:270
int tokens() const
Definition: alignmodel.h:183
virtual int maxlength() const
Definition: alignmodel.h:151
PatternModelInterface * getinterface()
Definition: alignmodel.h:124
Definition: datatypes.h:352
unsigned char model_version
Definition: alignmodel.h:13
virtual PatternFeatureVector< FeatureType > * getdata(const Pattern &pattern)
Definition: datatypes.h:457