/* GamMo Simple Morphological Analyzer
 *
 * version 0.14
 * Date 2006/11/21
 * Copyright (c) 2006 by knit
 * License: MIT License
 *   http://www.opensource.org/licenses/mit-license.php
 *   日本語訳
 *   http://www.opensource.jp/licenses/mit-license.html
 * Sample
 *   <script type="text/javascript" src="gammo.js" charset="UTF-8"></script>
 *   var str = "今日はいい天気ですね。";
 *   var g = new GamMo(str);
 *   g.parse();
 *   ["今日","は","いい","天気","で","すね","。"]
 *
 *   g.select("kanji");
 *   ["今日","天気"]
 */

function GamMo(str) { 
  if (typeof(str) == "string") this.str = str;
  this.morphPattern = {
    "kanji": "([一-龠々〆ヵヶ]+)",
    "hiragana": "([ぁ-ん]+)",
    "katakana": "([ァ-ヴー]+)",
    "hankaku": "([a-zA-Z0-9]+)",
    "zenkaku": "([ａ-ｚＡ-Ｚ０-９]+)",
    "kigoh": "([,.、。！!？?()（）「」『』]+)",
    "space": "([ 　\n]+)",
    "joshi": ["(か[もら]?|きり|くせに|[くぐ]らい|けれども?|[ただ]って|こそ|さえ|し[かも]|すら|ずつ|たり|だ[けの]|ったら|ってば|つつ|[てで][はも]|とか?|ところ[がでか]|ながら|など|な[らり]|なん[かて]|の[でにみ]|ばかり|ほ[かど]|まで|ものの|やら?|ゆえ|より|をば?|[がしてでにのはばへも])|", "(か[いな]?|かしら|って|ったら|ってば|とも?|なあ?|ねえ?|ものか?|よう?|[さぜぞのやわ])([.?!、。？！]+)"]
    };
  var pattern = [
    this.morphPattern["kanji"],
    this.morphPattern["katakana"],
    this.morphPattern["hankaku"],
    this.morphPattern["zenkaku"],
    this.morphPattern["kigoh"],
    this.morphPattern["space"]
    ];
  this.re = new RegExp;
  this.re.compile(pattern.join("|"), "g");
  this.joshi = new RegExp;
  this.joshi.compile(this.morphPattern["joshi"].join(""), "g");
}


GamMo.prototype.parse = function(str) {
  str = str || this.str;
  var result = [];
  if (typeof(str) == "string") {
    str = str.replace(this.joshi,
                      function($0, $1, $2, $3) {
                        // 助詞の処理
                        if ($1 || $2) {
                          // 終助詞の前後で区切る
                          if ($3) return "|" + $2 + "|" + $3;
                          // 助詞の後で区切る
                          return $1 + "|";
                        }
                      });
    
    str = str.replace(this.re,
                      function($0, $1, $2, $3, $4, $5, $6, $7, $8, $9) {
                        // 前で区切る（漢字、カタカナ）
                        if ($1 || $2) {
                          var word = $1 || $2;
                          return "|" + word;
                        }
                        // 前後で区切る（半角、全角、記号、空白）
                        if ($3 || $4 || $5 || $6) {
                          var word = $3 || $4 || $5 || $6;
                          return "|" + word + "|";
                        }
                      });

    str = str.replace(/^\||(\|){2,}|\|$/g, "$1");
    result = str.split("|");
  }
  return result;
  }


GamMo.prototype.select = function() {
  var str = this.str;
  var pattern = [];
  if (arguments[0]) {
    for (var i = 0; i < arguments.length; i++) {
      if (this.morphPattern[arguments[i]]) pattern.push(this.morphPattern[arguments[i]]);
    }
  } else {
    for (var i in this.morphPattern) {
      pattern.push(this.morphPattern[i]);
    }
  }
  
  var result = [];
  if (typeof(str) == "string") {
    var selectPattern = new RegExp;
    selectPattern.compile(pattern.join("|"), "g");
    result = str.match(selectPattern);
  }

  return result;
}

function Gammo(str) {
  return new GamMo(str);
}