java.io.Serializable
, CapabilitiesHandler
, CapabilitiesIgnorer
, CommandlineRunnable
, OptionHandler
, RevisionHandler
DistantSupervisionSyntheticFilter
, PMILexiconExpander
, TweetCentroid
, TweetToEmbeddingsFeatureVector
, TweetToInputLexiconFeatureVector
, TweetToLexiconFeatureVector
, TweetToSentiStrengthFeatureVector
, TweetToSparseFeatureVector
, TweetToWordListCountFeatureVector
public abstract class TweetToFeatureVector extends SimpleBatchFilter
Constructor | Description |
---|---|
TweetToFeatureVector() |
Modifier and Type | Method | Description |
---|---|---|
boolean |
allowAccessToFullInputFormat() |
|
Capabilities |
getCapabilities() |
|
java.lang.String[] |
getOptions() |
|
Stemmer |
getStemmer() |
|
StopwordsHandler |
getStopwordsHandler() |
|
java.lang.String |
getTextIndex() |
|
Tokenizer |
getTokenizer() |
|
boolean |
isReduceRepeatedLetters() |
|
boolean |
isStandarizeUrlsUsers() |
|
boolean |
isToLowerCase() |
|
java.util.Enumeration<Option> |
listOptions() |
|
void |
setOptions(java.lang.String[] options) |
|
void |
setReduceRepeatedLetters(boolean reduceRepeatedLetters) |
|
void |
setStandarizeUrlsUsers(boolean standarizeUrlsUsers) |
|
void |
setStemmer(Stemmer m_stemmer) |
|
void |
setStopwordsHandler(StopwordsHandler m_stopwordsHandler) |
|
void |
setTextIndex(java.lang.String textIndex) |
|
void |
setTokenizer(Tokenizer m_tokenizer) |
|
void |
setToLowerCase(boolean toLowerCase) |
batchFilterFile, debugTipText, doNotCheckCapabilitiesTipText, filterFile, getCapabilities, getCopyOfInputFormat, getDebug, getDoNotCheckCapabilities, getOutputFormat, getRevision, isFirstBatchDone, isNewBatch, isOutputFormatDefined, main, makeCopies, makeCopy, mayRemoveInstanceAfterFirstBatchDone, numPendingOutput, output, outputPeek, postExecution, preExecution, run, runFilter, setDebug, setDoNotCheckCapabilities, toString, useFilter, wekaStaticWrapper
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
batchFinished, input
globalInfo, setInputFormat
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class Filter
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class Filter
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class Filter
java.lang.Exception
public Capabilities getCapabilities()
getCapabilities
in interface CapabilitiesHandler
getCapabilities
in class Filter
public boolean allowAccessToFullInputFormat()
allowAccessToFullInputFormat
in class SimpleBatchFilter
@OptionMetadata(displayName="textIndex", description="The index (starting from 1) of the target string attribute. First and last are valid values. ", commandLineParamName="I", commandLineParamSynopsis="-I <col>", displayOrder=0) public java.lang.String getTextIndex()
public void setTextIndex(java.lang.String textIndex)
@OptionMetadata(displayName="lowercase", description="Lowercase the tweet\'s content.", commandLineParamIsFlag=true, commandLineParamName="U", commandLineParamSynopsis="-U", displayOrder=1) public boolean isToLowerCase()
public void setToLowerCase(boolean toLowerCase)
@OptionMetadata(displayName="standarize URLs and @user mentions", description="Reduce the attribute space by replacing user mentions and URLs with generic tokens.", commandLineParamIsFlag=true, commandLineParamName="stan", commandLineParamSynopsis="-stan", displayOrder=2) public boolean isStandarizeUrlsUsers()
public void setStandarizeUrlsUsers(boolean standarizeUrlsUsers)
@OptionMetadata(displayName="reduceRepeatedLetters", description="Reduce the attribute space by replacing sequences of letters occurring more than two times in a row with two occurrences of them (e.g., huuungry is reduced to huungry, loooove to loove)", commandLineParamIsFlag=true, commandLineParamName="red", commandLineParamSynopsis="-red", displayOrder=2) public boolean isReduceRepeatedLetters()
public void setReduceRepeatedLetters(boolean reduceRepeatedLetters)
@OptionMetadata(displayName="tokenizer", description="The tokenizing algorithm to use on the tweets. Uses the CMU TweetNLP tokenizer as default", commandLineParamName="tokenizer", commandLineParamSynopsis="-tokenizer <string>", displayOrder=3) public Tokenizer getTokenizer()
public void setTokenizer(Tokenizer m_tokenizer)
@OptionMetadata(displayName="stemmer", description="The stemming algorithm to use on the words. Default: no stemming.", commandLineParamName="stemmer", commandLineParamSynopsis="-stemmer <string>", displayOrder=4) public Stemmer getStemmer()
public void setStemmer(Stemmer m_stemmer)
@OptionMetadata(displayName="stopwordsHandler", description="The stopwords handler to use (Null means no stopwords are used).", commandLineParamName="stopwords-handler", commandLineParamSynopsis="-stopwords-handler <string>", displayOrder=5) public StopwordsHandler getStopwordsHandler()
public void setStopwordsHandler(StopwordsHandler m_stopwordsHandler)