java.io.Serializable
, CapabilitiesHandler
, CapabilitiesIgnorer
, CommandlineRunnable
, OptionHandler
, RevisionHandler
public class TweetNLPPOSTagger extends SimpleBatchFilter
@InProceedings{twitterNLP, Title = {Part-of-speech tagging for twitter: Annotation, features, and experiments}, Author = {Gimpel, Kevin and Schneider, Nathan and O'Connor, Brendan and Das, Dipanjan and Mills, Daniel and Eisenstein, Jacob and Heilman, Michael and Yogatama, Dani and Flanigan, Jeffrey and Smith, Noah A}, Booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies: short papers-Volume 2}, Year = {2011}, Organization = {Association for Computational Linguistics}, Pages = {42--47} }
Modifier and Type | Field | Description |
---|---|---|
static java.lang.String |
RESOURCES_FOLDER_NAME |
Default path to where resources are stored.
|
Constructor | Description |
---|---|
TweetNLPPOSTagger() |
Modifier and Type | Method | Description |
---|---|---|
boolean |
allowAccessToFullInputFormat() |
|
Capabilities |
getCapabilities() |
|
java.lang.String[] |
getOptions() |
|
java.lang.String |
getSeparator() |
|
java.io.File |
getTaggerFile() |
|
TechnicalInformation |
getTechnicalInformation() |
Returns an instance of a TechnicalInformation object, containing
detailed information about the technical background of this class,
e.g., paper reference or book this class is based on.
|
java.lang.String |
getTextIndex() |
|
Tokenizer |
getTokenizer() |
|
java.lang.String |
globalInfo() |
|
java.util.Enumeration<Option> |
listOptions() |
|
static void |
main(java.lang.String[] args) |
Main method for testing this class.
|
void |
setOptions(java.lang.String[] options) |
Parses the options for this object.
|
void |
setSeparator(java.lang.String separator) |
|
void |
setTaggerFile(java.io.File taggerFile) |
|
void |
setTextIndex(java.lang.String textIndex) |
|
void |
setTokenizer(Tokenizer m_tokenizer) |
|
java.lang.String |
tagTweet(java.lang.String content) |
Returns POS-tagger String from a given String using the CMU TweetNLP tool
|
batchFilterFile, debugTipText, doNotCheckCapabilitiesTipText, filterFile, getCapabilities, getCopyOfInputFormat, getDebug, getDoNotCheckCapabilities, getOutputFormat, getRevision, isFirstBatchDone, isNewBatch, isOutputFormatDefined, makeCopies, makeCopy, mayRemoveInstanceAfterFirstBatchDone, numPendingOutput, output, outputPeek, postExecution, preExecution, run, runFilter, setDebug, setDoNotCheckCapabilities, toString, useFilter, wekaStaticWrapper
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
batchFinished, input
setInputFormat
public static java.lang.String RESOURCES_FOLDER_NAME
public TechnicalInformation getTechnicalInformation()
public java.lang.String globalInfo()
globalInfo
in class SimpleFilter
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class Filter
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class Filter
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class Filter
options
- the options to usejava.lang.Exception
- if setting of options failspublic Capabilities getCapabilities()
getCapabilities
in interface CapabilitiesHandler
getCapabilities
in class Filter
public boolean allowAccessToFullInputFormat()
allowAccessToFullInputFormat
in class SimpleBatchFilter
public java.lang.String tagTweet(java.lang.String content)
content
- the String@OptionMetadata(displayName="textIndex", description="The index (starting from 1) of the target string attribute. First and last are valid values. ", commandLineParamName="I", commandLineParamSynopsis="-I <col>", displayOrder=0) public java.lang.String getTextIndex()
public void setTextIndex(java.lang.String textIndex)
@OptionMetadata(displayName="tokenizer", description="The tokenizing algorithm to use on the tweets. Uses the CMU TweetNLP tokenizer as default", commandLineParamName="tokenizer", commandLineParamSynopsis="-tokenizer <string>", displayOrder=1) public Tokenizer getTokenizer()
public void setTokenizer(Tokenizer m_tokenizer)
@OptionMetadata(displayName="taggerFile", description="The file with TweetNLP POS tagger model.", commandLineParamName="taggerFile", commandLineParamSynopsis="-taggerFile <string>", displayOrder=2) public java.io.File getTaggerFile()
public void setTaggerFile(java.io.File taggerFile)
@OptionMetadata(displayName="separator", description="The separator String between the token and the POS tag.", commandLineParamName="separator", commandLineParamSynopsis="-separator <string>", displayOrder=3) public java.lang.String getSeparator()
public void setSeparator(java.lang.String separator)
public static void main(java.lang.String[] args)
args
- should contain arguments to the filter: use -h for help