|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectkr.ac.kaist.swrc.jhannanum.plugin.MajorPlugin.MorphAnalyzer.ChartMorphAnalyzer.MorphemeChart
public class MorphemeChart
This class is for the lattice style morpheme chart which is a internal data structure for morphological analysis without backtracking.
| Nested Class Summary | |
|---|---|
class |
MorphemeChart.Morpheme
A morpheme node in the lattice style chart. |
| Field Summary | |
|---|---|
private java.lang.String |
bufString
string buffer |
MorphemeChart.Morpheme[] |
chart
the morpheme chart |
int |
chartEnd
the last index of the chart |
private static java.lang.String |
CHI_REPLACE
the reserved word for replacement of Chinese characters |
private int |
chiReplaceIndex
the index for replacement of Chinese characters |
private java.util.LinkedList<java.lang.String> |
chiReplacementList
the list for the replacement of Chinese character |
private Connection |
connection
the connection rules |
private static java.lang.String |
ENG_REPLACE
the reserved word for replacement of English alphabets |
private int |
engReplaceIndex
the index for replacement of English alphabets |
private java.util.LinkedList<java.lang.String> |
engReplacementList
the list for the replacement of English alphabets |
private Exp |
exp
chart expansion |
private static int |
MAX_CANDIDATE_NUM
the maximum number of analysis results |
private static int |
MAX_MORPHEME_CHART
the maximum number of morpheme nodes in the chart |
private static int |
MAX_MORPHEME_CONNECTION
the maximum number of connections between one morpheme and others |
private static int |
MORPHEME_STATE_FAIL
the processing state - fail |
private static int |
MORPHEME_STATE_INCOMPLETE
the processing state - incomplete |
private static int |
MORPHEME_STATE_SUCCESS
the processing state - success |
private NumberDic |
numDic
number dictionary - automata |
private int |
printResultCnt
the number of analysis results printed |
private java.util.LinkedList<Eojeol> |
resEojeols
the list of eojeols analyzed |
private java.util.ArrayList<java.lang.String> |
resMorphemes
the list of morphemes analyzed |
private java.util.ArrayList<java.lang.String> |
resTags
the list of morpheme tags analyzed |
private int[] |
segmentPath
path of segmentation |
private Simti |
simti
SIMple Trie Index |
private SegmentPosition |
sp
segment position |
private Trie |
systemDic
system morpheme dictionary |
private TagSet |
tagSet
the morpheme tag set |
private Trie |
userDic
user morpheme dictionary |
| Constructor Summary | |
|---|---|
MorphemeChart(TagSet tagSet,
Connection connection,
Trie systemDic,
Trie userDic,
NumberDic numDic,
Simti simti,
java.util.LinkedList<Eojeol> resEojeolList)
Constructor. |
|
| Method Summary | |
|---|---|
int |
addMorpheme(int tag,
int phoneme,
int nextPosition,
int nextTagType)
Adds a new morpheme to the chart. |
int |
altSegment(java.lang.String str)
It inserts the reverse of the given string to the SIMTI data structure. |
int |
analyze()
It performs morphological analysis on the morpheme chart constructed. |
private int |
analyze(int chartIndex,
int tagType)
It performs morphological anlysis on the morpheme chart from the specified index in the chart. |
private int |
analyzeUnknown()
It segments all phonemes, and tags 'unknown' to each segment, and then performs chart analysis, so that the eojeols that consist of morphems not in the dictionaries can be processed. |
boolean |
checkChart(int[] morpheme,
int morphemeLen,
int tag,
int phoneme,
int nextPosition,
int nextTagType,
java.lang.String str)
Checks the specified morpheme is exist in the morpheme chart. |
void |
getResult()
Generates the morphological analysis result based on the morpheme chart where the analysis is performed. |
void |
init(java.lang.String word)
Initializes the morpheme chart with the specified word. |
void |
phonemeChange(int from,
java.lang.String front,
java.lang.String back,
int ftag,
int btag,
int phoneme)
It expands the morpheme chart to deal with the phoneme change phenomenon. |
private java.lang.String |
preReplace(java.lang.String str)
Replaces the English alphabets and Chinese characters in the specified string with the reserved words. |
private void |
printChart(int chartIndex)
It generates the final mophological analysis result from the morpheme chart. |
void |
printMorphemeAll()
It prints the all data in the chart to the console. |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
private static final java.lang.String CHI_REPLACE
private static final java.lang.String ENG_REPLACE
private java.util.LinkedList<java.lang.String> chiReplacementList
private java.util.LinkedList<java.lang.String> engReplacementList
private int engReplaceIndex
private int chiReplaceIndex
private static final int MAX_MORPHEME_CONNECTION
private static final int MAX_MORPHEME_CHART
private static final int MORPHEME_STATE_INCOMPLETE
private static final int MORPHEME_STATE_SUCCESS
private static final int MAX_CANDIDATE_NUM
private static final int MORPHEME_STATE_FAIL
public MorphemeChart.Morpheme[] chart
public int chartEnd
private TagSet tagSet
private Connection connection
private SegmentPosition sp
private java.lang.String bufString
private int[] segmentPath
private Exp exp
private Trie systemDic
private Trie userDic
private NumberDic numDic
private Simti simti
private int printResultCnt
private java.util.LinkedList<Eojeol> resEojeols
private java.util.ArrayList<java.lang.String> resMorphemes
private java.util.ArrayList<java.lang.String> resTags
| Constructor Detail |
|---|
public MorphemeChart(TagSet tagSet,
Connection connection,
Trie systemDic,
Trie userDic,
NumberDic numDic,
Simti simti,
java.util.LinkedList<Eojeol> resEojeolList)
tagSet - - the morpheme tag setconnection - - the morpheme connection rulessystemDic - - the system morpheme dictionaryuserDic - - the user morpheme dictionarynumDic - - the number dictionarysimti - - the SIMple Trie IndexresEojeolList - - the list of eojeols to store the analysis result| Method Detail |
|---|
public int addMorpheme(int tag,
int phoneme,
int nextPosition,
int nextTagType)
tag - - the morpheme tag IDphoneme - - phonemenextPosition - - the index of next morphemenextTagType - - the tag type of next morpheme
public int altSegment(java.lang.String str)
str - - string to insert to the SIMTI structure
public int analyze()
private int analyze(int chartIndex,
int tagType)
chartIndex - - the index of the chart to analyzetagType - - the type of next morpheme
private int analyzeUnknown()
public boolean checkChart(int[] morpheme,
int morphemeLen,
int tag,
int phoneme,
int nextPosition,
int nextTagType,
java.lang.String str)
morpheme - - the list of indices of the morphemes to checkmorphemeLen - - the length of the listtag - - morpheme tag IDphoneme - - phonemenextPosition - - the index of the next morphemenextTagType - - the type of the next morpheme tagstr - - plain string
public void getResult()
public void init(java.lang.String word)
word - - the plain string of an eojeol to analyze
public void phonemeChange(int from,
java.lang.String front,
java.lang.String back,
int ftag,
int btag,
int phoneme)
from - - the index of the start segment positionfront - - the front part of the stringback - - the next part of the stringftag - - the morpheme tag of the front partbtag - - the morpheme tag of the next partphoneme - - phonemeprivate void printChart(int chartIndex)
chartIndex - - the start index of the chart to generate final resultpublic void printMorphemeAll()
private java.lang.String preReplace(java.lang.String str)
str - - the string to replace English and Chinese characters
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||