java.io.Serializable
, CapabilitiesHandler
, CapabilitiesIgnorer
, CommandlineRunnable
, OptionHandler
, RevisionHandler
public class LexiconDistantSupervision extends SimpleBatchFilter
Modifier and Type | Field | Description |
---|---|---|
static java.lang.String |
LEXICON_FOLDER_NAME |
Default path to where lexicons are stored
|
Constructor | Description |
---|---|
LexiconDistantSupervision() |
Modifier and Type | Method | Description |
---|---|---|
boolean |
allowAccessToFullInputFormat() |
|
Capabilities |
getCapabilities() |
|
java.io.File |
getLexicon() |
|
java.lang.String[] |
getOptions() |
|
java.lang.String |
getPolarityAttName() |
|
java.lang.String |
getPolarityAttNegValName() |
|
java.lang.String |
getPolarityAttPosValName() |
|
TechnicalInformation |
getTechnicalInformation() |
Returns an instance of a TechnicalInformation object, containing
detailed information about the technical background of this class,
e.g., paper reference or book this class is based on.
|
java.lang.String |
getTextIndex() |
|
Tokenizer |
getTokenizer() |
|
java.lang.String |
globalInfo() |
|
boolean |
isRemoveMatchingWord() |
|
java.util.Enumeration<Option> |
listOptions() |
|
static void |
main(java.lang.String[] args) |
Main method for testing this class.
|
void |
setLexicon(java.io.File lexicon) |
|
void |
setOptions(java.lang.String[] options) |
Parses the options for this object.
|
void |
setPolarityAttName(java.lang.String polarityAttName) |
|
void |
setPolarityAttNegValName(java.lang.String polarityAttNegValName) |
|
void |
setPolarityAttPosValName(java.lang.String polarityAttPosValName) |
|
void |
setRemoveMatchingWord(boolean removeMatchingWord) |
|
void |
setTextIndex(java.lang.String textIndex) |
|
void |
setTokenizer(Tokenizer m_tokenizer) |
batchFilterFile, debugTipText, doNotCheckCapabilitiesTipText, filterFile, getCapabilities, getCopyOfInputFormat, getDebug, getDoNotCheckCapabilities, getOutputFormat, getRevision, isFirstBatchDone, isNewBatch, isOutputFormatDefined, makeCopies, makeCopy, mayRemoveInstanceAfterFirstBatchDone, numPendingOutput, output, outputPeek, postExecution, preExecution, run, runFilter, setDebug, setDoNotCheckCapabilities, toString, useFilter, wekaStaticWrapper
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
batchFinished, input
setInputFormat
public static java.lang.String LEXICON_FOLDER_NAME
public TechnicalInformation getTechnicalInformation()
public java.lang.String globalInfo()
globalInfo
in class SimpleFilter
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class Filter
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class Filter
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class Filter
options
- the options to usejava.lang.Exception
- if setting of options failspublic Capabilities getCapabilities()
getCapabilities
in interface CapabilitiesHandler
getCapabilities
in class Filter
public boolean allowAccessToFullInputFormat()
allowAccessToFullInputFormat
in class SimpleBatchFilter
@OptionMetadata(displayName="textIndex", description="The index (starting from 1) of the target string attribute. First and last are valid values. ", commandLineParamName="I", commandLineParamSynopsis="-I <col>", displayOrder=0) public java.lang.String getTextIndex()
public void setTextIndex(java.lang.String textIndex)
@OptionMetadata(displayName="tokenizer", description="The tokenizing algorithm to use on the tweets. Uses the CMU TweetNLP tokenizer as default", commandLineParamName="tokenizer", commandLineParamSynopsis="-tokenizer <string>", displayOrder=1) public Tokenizer getTokenizer()
public void setTokenizer(Tokenizer m_tokenizer)
@OptionMetadata(displayName="lexicon", description="The file containing a lexicon in ARFF format with word polarities.", commandLineParamName="lex", commandLineParamSynopsis="-lex <string>", displayOrder=2) public java.io.File getLexicon()
public void setLexicon(java.io.File lexicon)
@OptionMetadata(displayName="polarityAttName", description="The lexicon attribute name with the word polarities. The attribute must be nominal. \t default: polarity", commandLineParamName="polatt", commandLineParamSynopsis="-polatt <string>", displayOrder=3) public java.lang.String getPolarityAttName()
public void setPolarityAttName(java.lang.String polarityAttName)
@OptionMetadata(displayName="polarityAttPosValName", description="The lexicon attribute value name for positive words. \t default: positive", commandLineParamName="posval", commandLineParamSynopsis="-posval <String>", displayOrder=4) public java.lang.String getPolarityAttPosValName()
public void setPolarityAttPosValName(java.lang.String polarityAttPosValName)
@OptionMetadata(displayName="polarityAttNegValName", description="The lexicon attribute value name for negative words. \t default: negative", commandLineParamName="negval", commandLineParamSynopsis="-negval <String>", displayOrder=5) public java.lang.String getPolarityAttNegValName()
public void setPolarityAttNegValName(java.lang.String polarityAttNegValName)
@OptionMetadata(displayName="removeMatchingWord", description="True for removing the words from the lexicon in the tweet. This avoids overfitting classifiers trained from the labelled data. \t default: TRUE", commandLineParamIsFlag=true, commandLineParamName="removeMatchingWord", commandLineParamSynopsis="-removeMatchingWord", displayOrder=6) public boolean isRemoveMatchingWord()
public void setRemoveMatchingWord(boolean removeMatchingWord)
public static void main(java.lang.String[] args)
args
- should contain arguments to the filter: use -h for help