java.io.Serializable
, CapabilitiesHandler
, CapabilitiesIgnorer
, CommandlineRunnable
, OptionHandler
, RevisionHandler
public abstract class DistantSupervisionSyntheticFilter extends TweetToFeatureVector
Modifier and Type | Field | Description |
---|---|---|
static java.lang.String |
CLUSTPREFIX |
The prefix for cluster-based attributes.
|
static java.lang.String |
LEXICON_FOLDER_NAME |
Default path to where lexicons are stored.
|
static java.lang.String |
RESOURCES_FOLDER_NAME |
Default path to where resources are stored.
|
static java.lang.String |
UNIPREFIX |
The prefix for unigram attributes.
|
Constructor | Description |
---|---|
DistantSupervisionSyntheticFilter() |
Modifier and Type | Method | Description |
---|---|---|
it.unimi.dsi.fastutil.objects.Object2IntMap<java.lang.String> |
calculateDocVec(java.util.List<java.lang.String> tokens) |
Calculates tweet vectors from a list of tokens
|
java.io.File |
getLexicon() |
|
int |
getMinAttDocs() |
|
java.lang.String |
getPolarityAttName() |
|
java.lang.String |
getPolarityAttNegValName() |
|
java.lang.String |
getPolarityAttPosValName() |
|
int |
getRandomSeed() |
|
java.io.File |
getWordClustFile() |
|
boolean |
isCreateClustAtts() |
|
boolean |
isCreateWordAtts() |
|
Instances |
mapTargetInstance(Instances inp) |
Maps tweets from the second batch into instances that are compatible with the ones generated
|
void |
setCreateClustAtts(boolean createClustAtts) |
|
void |
setCreateWordAtts(boolean createWordAtts) |
|
void |
setLexicon(java.io.File lexicon) |
|
void |
setMinAttDocs(int minAttDocs) |
|
void |
setPolarityAttName(java.lang.String polarityAttName) |
|
void |
setPolarityAttNegValName(java.lang.String polarityAttNegValName) |
|
void |
setPolarityAttPosValName(java.lang.String polarityAttPosValName) |
|
void |
setRandomSeed(int randomSeed) |
|
void |
setWordClustFile(java.io.File wordClustFile) |
batchFilterFile, debugTipText, doNotCheckCapabilitiesTipText, filterFile, getCapabilities, getCopyOfInputFormat, getDebug, getDoNotCheckCapabilities, getOutputFormat, getRevision, isFirstBatchDone, isNewBatch, isOutputFormatDefined, main, makeCopies, makeCopy, mayRemoveInstanceAfterFirstBatchDone, numPendingOutput, output, outputPeek, postExecution, preExecution, run, runFilter, setDebug, setDoNotCheckCapabilities, toString, useFilter, wekaStaticWrapper
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
batchFinished, input
globalInfo, setInputFormat
allowAccessToFullInputFormat, getCapabilities, getOptions, getStemmer, getStopwordsHandler, getTextIndex, getTokenizer, isReduceRepeatedLetters, isStandarizeUrlsUsers, isToLowerCase, listOptions, setOptions, setReduceRepeatedLetters, setStandarizeUrlsUsers, setStemmer, setStopwordsHandler, setTextIndex, setTokenizer, setToLowerCase
public static java.lang.String RESOURCES_FOLDER_NAME
public static java.lang.String LEXICON_FOLDER_NAME
public static java.lang.String UNIPREFIX
public static java.lang.String CLUSTPREFIX
public Instances mapTargetInstance(Instances inp)
inp
- input Instancespublic it.unimi.dsi.fastutil.objects.Object2IntMap<java.lang.String> calculateDocVec(java.util.List<java.lang.String> tokens)
tokens
- a tokenized tweet@OptionMetadata(displayName="minAttDocs", description="Minimum frequency of a sparse attribute to be considered in the attribute space.", commandLineParamName="M", commandLineParamSynopsis="-M <int>", displayOrder=6) public int getMinAttDocs()
public void setMinAttDocs(int minAttDocs)
@OptionMetadata(displayName="createWordAtts", description="True for creating unigram attributes.", commandLineParamIsFlag=true, commandLineParamName="W", commandLineParamSynopsis="-W", displayOrder=7) public boolean isCreateWordAtts()
public void setCreateWordAtts(boolean createWordAtts)
@OptionMetadata(displayName="createClustAtts", description="True for creating attributes using word clusters", commandLineParamIsFlag=true, commandLineParamName="C", commandLineParamSynopsis="-C", displayOrder=8) public void setCreateClustAtts(boolean createClustAtts)
public boolean isCreateClustAtts()
@OptionMetadata(displayName="wordClustFile", description="The file containing the word clusters.", commandLineParamName="H", commandLineParamSynopsis="-H <string>", displayOrder=9) public java.io.File getWordClustFile()
public void setWordClustFile(java.io.File wordClustFile)
@OptionMetadata(displayName="lexicon", description="The file containing a lexicon in ARFF format with word polarities.", commandLineParamName="lex", commandLineParamSynopsis="-lex <string>", displayOrder=10) public java.io.File getLexicon()
public void setLexicon(java.io.File lexicon)
@OptionMetadata(displayName="randomseed", description="The random seed number. \t default: 1", commandLineParamName="R", commandLineParamSynopsis="-R <int>", displayOrder=11) public int getRandomSeed()
public void setRandomSeed(int randomSeed)
@OptionMetadata(displayName="polarityAttName", description="The lexicon attribute name with the word polarities. \t default: polarity", commandLineParamName="polatt", commandLineParamSynopsis="-polatt <string>", displayOrder=12) public java.lang.String getPolarityAttName()
public void setPolarityAttName(java.lang.String polarityAttName)
@OptionMetadata(displayName="polarityAttPosValName", description="The lexicon attribute value name for positive words. \t default: positive", commandLineParamName="posval", commandLineParamSynopsis="-posval <String>", displayOrder=17) public java.lang.String getPolarityAttPosValName()
public void setPolarityAttPosValName(java.lang.String polarityAttPosValName)
@OptionMetadata(displayName="polarityAttNegValName", description="The lexicon attribute value name for negative words. \t default: negative", commandLineParamName="negval", commandLineParamSynopsis="-negval <String>", displayOrder=18) public java.lang.String getPolarityAttNegValName()
public void setPolarityAttNegValName(java.lang.String polarityAttNegValName)