java.io.Serializable
, CapabilitiesHandler
, CapabilitiesIgnorer
, CommandlineRunnable
, OptionHandler
, RevisionHandler
public class TweetToLexiconFeatureVector extends TweetToFeatureVector
@Article{BravoMarquez2014, Title = {Meta-level sentiment models for big social data analysis }, Author = {Felipe Bravo-Marquez and Marcelo Mendoza and Barbara Poblete}, Journal = {Knowledge-Based Systems }, Year = {2014}, Number = {0}, Pages = {86 - 99}, Volume = {69}, Doi = {http://dx.doi.org/10.1016/j.knosys.2014.05.016}, ISSN = {0950-7051}, Keywords = {Sentiment classification}, Url = {http://www.sciencedirect.com/science/article/pii/S0950705114002068} }
Modifier and Type | Field | Description |
---|---|---|
static java.lang.String |
AFINN_FILE_NAME |
The path of the AFINN lexicon.
|
static java.lang.String |
BING_LIU_FILE_NAME |
The path of the BingLiu lexicon.
|
static java.lang.String |
EMOTICON_LIST_FILE_NAME |
The path of the emoticon list.
|
static java.lang.String |
LEXICON_FOLDER_NAME |
Default path to where lexicons are stored.
|
static java.lang.String |
MPQA_FILE_NAME |
The path of the MPQA lexicon.
|
static java.lang.String |
NEGATION_LIST_FILE_NAME |
The path of the negation list.
|
static java.lang.String |
NRC_HASH_EMO_FILE_NAME |
The path of the NRC Hashtag Emotion lexicon.
|
static java.lang.String |
NRC_HASH_SENT_FILE_NAME |
The path of the NRC-Hashtag-Sentiment lexicon.
|
static java.lang.String |
NRC10_EXPANDED_FILE_NAME |
The path of the NRC-10-Expanded lexicon.
|
static java.lang.String |
NRC10_FILE_NAME |
The path of the NRC-emotion lexicon.
|
static java.lang.String |
S140_FILE_NAME |
The path of the S140 lexicon.
|
static java.lang.String |
SENTIWORDNET_FILE_NAME |
The path of SentiWordnet.
|
Constructor | Description |
---|---|
TweetToLexiconFeatureVector() |
Modifier and Type | Method | Description |
---|---|---|
TechnicalInformation |
getTechnicalInformation() |
Returns an instance of a TechnicalInformation object, containing
detailed information about the technical background of this class,
e.g., paper reference or book this class is based on.
|
java.lang.String |
globalInfo() |
Returns a string describing this filter.
|
boolean |
isUseAfinn() |
|
boolean |
isUseBingLiu() |
|
boolean |
isUseEmoticons() |
|
boolean |
isUseMpqa() |
|
boolean |
isUseNegation() |
|
boolean |
isUseNrc10() |
|
boolean |
isUseNrc10Expanded() |
|
boolean |
isUseNrcHashEmo() |
|
boolean |
isUseNrcHashSent() |
|
boolean |
isUseS140() |
|
boolean |
isUseSentiWordnet() |
|
static void |
main(java.lang.String[] args) |
Main method for testing this class.
|
void |
setUseAfinn(boolean useAfinn) |
|
void |
setUseBingLiu(boolean useBingLiu) |
|
void |
setUseEmoticons(boolean useEmoticons) |
|
void |
setUseMpqa(boolean useMpqa) |
|
void |
setUseNegation(boolean useNegation) |
|
void |
setUseNrc10(boolean useNrc10) |
|
void |
setUseNrc10Expanded(boolean useNrc10Expanded) |
|
void |
setUseNrcHashEmo(boolean useNrcHashEmo) |
|
void |
setUseNrcHashSent(boolean useNrcHashSent) |
|
void |
setUseS140(boolean useS140) |
|
void |
setUseSentiWordnet(boolean useSentiWordnet) |
batchFilterFile, debugTipText, doNotCheckCapabilitiesTipText, filterFile, getCapabilities, getCopyOfInputFormat, getDebug, getDoNotCheckCapabilities, getOutputFormat, getRevision, isFirstBatchDone, isNewBatch, isOutputFormatDefined, makeCopies, makeCopy, mayRemoveInstanceAfterFirstBatchDone, numPendingOutput, output, outputPeek, postExecution, preExecution, run, runFilter, setDebug, setDoNotCheckCapabilities, toString, useFilter, wekaStaticWrapper
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
batchFinished, input
setInputFormat
allowAccessToFullInputFormat, getCapabilities, getOptions, getStemmer, getStopwordsHandler, getTextIndex, getTokenizer, isReduceRepeatedLetters, isStandarizeUrlsUsers, isToLowerCase, listOptions, setOptions, setReduceRepeatedLetters, setStandarizeUrlsUsers, setStemmer, setStopwordsHandler, setTextIndex, setTokenizer, setToLowerCase
public static java.lang.String LEXICON_FOLDER_NAME
public static java.lang.String MPQA_FILE_NAME
public static java.lang.String BING_LIU_FILE_NAME
public static java.lang.String AFINN_FILE_NAME
public static java.lang.String S140_FILE_NAME
public static java.lang.String NRC_HASH_SENT_FILE_NAME
public static java.lang.String NRC10_FILE_NAME
public static java.lang.String NRC10_EXPANDED_FILE_NAME
public static java.lang.String NRC_HASH_EMO_FILE_NAME
public static java.lang.String SENTIWORDNET_FILE_NAME
public static java.lang.String EMOTICON_LIST_FILE_NAME
public static java.lang.String NEGATION_LIST_FILE_NAME
public java.lang.String globalInfo()
globalInfo
in class SimpleFilter
public TechnicalInformation getTechnicalInformation()
@OptionMetadata(displayName="useMpqa", description="Counts the number of positive and negative words from the MPQA subjectivity lexicon.\nMore info at: http://mpqa.cs.pitt.edu/lexicons/subj_lexicon/ \nPublication: Theresa Wilson, Janyce Wiebe, and Paul Hoffmann, Recognizing Contextual Polarity in Phrase-Level Sentiment Analysis.", commandLineParamIsFlag=true, commandLineParamName="A", commandLineParamSynopsis="-A", displayOrder=6) public boolean isUseMpqa()
public void setUseMpqa(boolean useMpqa)
@OptionMetadata(displayName="useBingLiu", description="Counts the number of positive and negative words from the Bing Liu lexicon.\nMore info at: https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon \nPublication: Minqing Hu and Bing Liu, Mining and summarizing customer reviews.", commandLineParamIsFlag=true, commandLineParamName="D", commandLineParamSynopsis="-D", displayOrder=7) public boolean isUseBingLiu()
public void setUseBingLiu(boolean useBingLiu)
@OptionMetadata(displayName="useAfinn", description="Calculates a positive and negative score by aggregating the word associations provided by the AFINN lexicon.\nMore info at: http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=6010 \nPublication: Finn Arup Nielsen, A new ANEW: Evaluation of a word list for sentiment analysis in microblogs.", commandLineParamIsFlag=true, commandLineParamName="F", commandLineParamSynopsis="-F", displayOrder=8) public boolean isUseAfinn()
public void setUseAfinn(boolean useAfinn)
@OptionMetadata(displayName="useS140", description="Calculates a positive and negative score by aggregating the word associations provided by the S140 lexicon.\nMore info at: http://saifmohammad.com/WebPages/lexicons.html \nPublication: Svetlana Kiritchenko, Xiaodan Zhu and Saif Mohammad, Sentiment Analysis of Short Informal Texts.", commandLineParamIsFlag=true, commandLineParamName="H", commandLineParamSynopsis="-H", displayOrder=9) public boolean isUseS140()
public void setUseS140(boolean useS140)
@OptionMetadata(displayName="useNrcHashSent", description="Calculates a positive and negative score by aggregating the word associations provided by the NRC Hashtag Sentiment lexicon.\nMore info at: http://saifmohammad.com/WebPages/lexicons.html \nPublication: Svetlana Kiritchenko, Xiaodan Zhu and Saif Mohammad, Sentiment Analysis of Short Informal Texts.", commandLineParamIsFlag=true, commandLineParamName="J", commandLineParamSynopsis="-J", displayOrder=10) public boolean isUseNrcHashSent()
public void setUseNrcHashSent(boolean useNrcHashSent)
@OptionMetadata(displayName="useNrc10", description="Counts the number of words matching each emotion from the NRC Word-Emotion Association Lexicon.\nMore info at: http://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm\nPublication: Saif Mohammad and Peter Turney, Crowdsourcing a Word-Emotion Association Lexicon.", commandLineParamIsFlag=true, commandLineParamName="L", commandLineParamSynopsis="-L", displayOrder=11) public boolean isUseNrc10()
public void setUseNrc10(boolean useNrc10)
@OptionMetadata(displayName="useNrc10Expanded", description="Adds the emotion associations of the words matching the Twitter Specific expansion of the NRC Word-Emotion Association Lexicon.\nMore info at: http://www.cs.waikato.ac.nz/ml/sa/lex.html#emolextwitter\nPublication: F. Bravo-Marquez, E. Frank, S. M. Mohammad, and B. Pfahringer, Determining Word--Emotion Associations from Tweets by Multi-Label Classification.", commandLineParamIsFlag=true, commandLineParamName="N", commandLineParamSynopsis="-N", displayOrder=12) public boolean isUseNrc10Expanded()
public void setUseNrc10Expanded(boolean useNrc10Expanded)
@OptionMetadata(displayName="useNrcHashEmo", description="Adds the emotion associations of the words matching the NRC Hashtag Emotion Association Lexicon.\nMore info at: http://saifmohammad.com/WebPages/lexicons.html\nPublication: Saif M. Mohammad, Svetlana Kiritchenko, Using Hashtags to Capture Fine Emotion Categories from Tweets.", commandLineParamIsFlag=true, commandLineParamName="P", commandLineParamSynopsis="-P", displayOrder=13) public boolean isUseNrcHashEmo()
public void setUseNrcHashEmo(boolean useNrcHashEmo)
@OptionMetadata(displayName="useSentiWordnet", description="Calculates positive and negative scores using SentiWordnet. We calculate a weighted average of the sentiment distributions of the synsets for word occuring in multiple synsets. The weights correspond to the reciprocal ranks of the senses in order to give higher weights to most popular senses. \nMore info at: http://sentiwordnet.isti.cnr.it/\nPublication: Stefano Baccianella, Andrea Esuli, and Fabrizio Sebastiani, SENTIWORDNET 3.0: An Enhanced Lexical Resource for Sentiment Analysis and Opinion Mining.", commandLineParamIsFlag=true, commandLineParamName="Q", commandLineParamSynopsis="-Q", displayOrder=14) public boolean isUseSentiWordnet()
public void setUseSentiWordnet(boolean useSentiWordnet)
@OptionMetadata(displayName="useEmoticons", description="Calculates a positive and a negative score by aggregating the word associations provided by a list of emoticons.\nThe list is taken from the AFINN project.\nMore info at: https://github.com/fnielsen/afinn \n", commandLineParamIsFlag=true, commandLineParamName="R", commandLineParamSynopsis="-R", displayOrder=15) public boolean isUseEmoticons()
public void setUseEmoticons(boolean useEmoticons)
@OptionMetadata(displayName="useNegation", description="Counts the number of negating words in the tweet.", commandLineParamIsFlag=true, commandLineParamName="T", commandLineParamSynopsis="-T", displayOrder=16) public boolean isUseNegation()
public void setUseNegation(boolean useNegation)
public static void main(java.lang.String[] args)
args
- should contain arguments to the filter: use -h for help