java.io.Serializable
, CapabilitiesHandler
, CapabilitiesIgnorer
, CommandlineRunnable
, OptionHandler
, RevisionHandler
public class PTCM extends DistantSupervisionSyntheticFilter
CLUSTPREFIX, LEXICON_FOLDER_NAME, RESOURCES_FOLDER_NAME, UNIPREFIX
Constructor | Description |
---|---|
PTCM() |
Modifier and Type | Method | Description |
---|---|---|
void |
computeWordVecsAndVoc(Instances inputFormat) |
Calculates the vocabulary and the word vectors from an Instances object
The vocabulary is only extracted the first time the filter is run.
|
int |
getMinInstDocs() |
|
int |
getPartNumber() |
|
TechnicalInformation |
getTechnicalInformation() |
Returns an instance of a TechnicalInformation object, containing
detailed information about the technical background of this class,
e.g., paper reference or book this class is based on.
|
java.lang.String |
globalInfo() |
|
static void |
main(java.lang.String[] args) |
Main method for testing this class.
|
void |
setMinInstDocs(int minInstDocs) |
|
void |
setPartNumber(int partNumber) |
calculateDocVec, getLexicon, getMinAttDocs, getPolarityAttName, getPolarityAttNegValName, getPolarityAttPosValName, getRandomSeed, getWordClustFile, isCreateClustAtts, isCreateWordAtts, mapTargetInstance, setCreateClustAtts, setCreateWordAtts, setLexicon, setMinAttDocs, setPolarityAttName, setPolarityAttNegValName, setPolarityAttPosValName, setRandomSeed, setWordClustFile
batchFilterFile, debugTipText, doNotCheckCapabilitiesTipText, filterFile, getCapabilities, getCopyOfInputFormat, getDebug, getDoNotCheckCapabilities, getOutputFormat, getRevision, isFirstBatchDone, isNewBatch, isOutputFormatDefined, makeCopies, makeCopy, mayRemoveInstanceAfterFirstBatchDone, numPendingOutput, output, outputPeek, postExecution, preExecution, run, runFilter, setDebug, setDoNotCheckCapabilities, toString, useFilter, wekaStaticWrapper
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
batchFinished, input
setInputFormat
allowAccessToFullInputFormat, getCapabilities, getOptions, getStemmer, getStopwordsHandler, getTextIndex, getTokenizer, isReduceRepeatedLetters, isStandarizeUrlsUsers, isToLowerCase, listOptions, setOptions, setReduceRepeatedLetters, setStandarizeUrlsUsers, setStemmer, setStopwordsHandler, setTextIndex, setTokenizer, setToLowerCase
public java.lang.String globalInfo()
globalInfo
in class SimpleFilter
public TechnicalInformation getTechnicalInformation()
public void computeWordVecsAndVoc(Instances inputFormat)
inputFormat
- the input Instances@OptionMetadata(displayName="minInstDocs", description="Minimum frequency of a word to be considered in the instance space.", commandLineParamName="N", commandLineParamSynopsis="-N <int>", displayOrder=13) public int getMinInstDocs()
public void setMinInstDocs(int minInstDocs)
@OptionMetadata(displayName="partNumber", description="The size of the partition for the tweet centroid model (-1 for not partionining). \t default: -1", commandLineParamName="A", commandLineParamSynopsis="-A <int>", displayOrder=14) public int getPartNumber()
public void setPartNumber(int partNumber)
public static void main(java.lang.String[] args)
args
- should contain arguments to the filter: use -h for help