import java.util.Map;
import java.util.HashMap;

/**
 * Halfwidth and Fullwidth Character Normalization for CJK
 * http://s...content-available-to-author-only...s.asia
 *
 * See the Unicode Standard 6.0 – Halfwidth and Fullwidth Forms
 * http://u...content-available-to-author-only...e.org/charts/PDF/UFF00.pdf
 *
 * For Chinese, Japanese and Korean, some characters have Unicode mappings to
 * both a halfwidth and a fullwidth version. This code normalizes them
 * to halfwidth for latin characters, numbers and punctuation and fullwidth
 * for everything else.
 * Fine for half/full width normalization but not fully equivalent to NFKC
 * normalization
 */
public class Main{
	
	private static final Map<Character, Character> charCodeMap;
	// Key — Original Character
	// Value — Replacement character
	static {
		charCodeMap = new HashMap<Character, Character>();
		// TO HALFWIDTH CHARACTERS
		// ASCII variants (Latin Symbols, Punctuation, Numbers, and Alphabet)
		for (char key = '\uff01'; key <= '\uff5e'; key++) {
			char value = (char) (key - '\ufee0');
			charCodeMap.put(key, value);
		}
		// Brackets
		charCodeMap.put('\uff5f', '\u2985'); // left white parenthesis
		charCodeMap.put('\uff60', '\u2986'); // right white parenthesis
		// Symbol Variants
		charCodeMap.put('\uffe0', '\u00a2'); // Cent sign
		charCodeMap.put('\uffe1', '\u00a3'); // Pound sign
		charCodeMap.put('\uffe2', '\u00ac'); // Not sign
		charCodeMap.put('\uffe3', '\u00af'); // Macron
		charCodeMap.put('\uffe4', '\u00a6'); // Broken Bar
		charCodeMap.put('\uffe5', '\u00a5'); // Yen sign
		charCodeMap.put('\uffe6', '\u20a9'); // Won sign
		// Space (strictly speaking not listed in Unicode 6.0 Halfwidth and
		// Fullwidth forms but including here as the ideographic space can
		// cause issues)
		charCodeMap.put('\u3000', '\u0020'); // SPACE
		// TO FULLWIDTH CHARACTERS
		// CJK punctuation
		charCodeMap.put('\uff61', '\u3002'); // ideographic full stop
		charCodeMap.put('\uff62', '\u300c'); // left corner bracket
		charCodeMap.put('\uff63', '\u300d'); // right corner bracket
		charCodeMap.put('\uff64', '\u3001'); // ideographic comma
		// Katakana variants
		charCodeMap.put('\uff65', '\u30fb'); // Middle Dot
		charCodeMap.put('\uff66', '\u30f2'); // Wo
		charCodeMap.put('\uff67', '\u30a1'); // A small
		charCodeMap.put('\uff68', '\u30a3'); // I small
		charCodeMap.put('\uff69', '\u30a5'); // U small
		charCodeMap.put('\uff6a', '\u30a7'); // E small
		charCodeMap.put('\uff6b', '\u30a9'); // O small
		charCodeMap.put('\uff6c', '\u30e3'); // Ya small
		charCodeMap.put('\uff6d', '\u30e5'); // Yu small
		charCodeMap.put('\uff6e', '\u30e7'); // Yo small
		charCodeMap.put('\uff6f', '\u30c3'); // Tsu small
		charCodeMap.put('\uff70', '\u30fc'); // Prolonged Sound Mark
		charCodeMap.put('\uff71', '\u30a2'); // A
		charCodeMap.put('\uff72', '\u30a4'); // I
		charCodeMap.put('\uff73', '\u30a6'); // U
		charCodeMap.put('\uff74', '\u30a8'); // E
		charCodeMap.put('\uff75', '\u30aa'); // O
		charCodeMap.put('\uff76', '\u30ab'); // Ka
		charCodeMap.put('\uff77', '\u30ad'); // Ki
		charCodeMap.put('\uff78', '\u30af'); // Ku
		charCodeMap.put('\uff79', '\u30b1'); // Ke
		charCodeMap.put('\uff7a', '\u30b3'); // Ko
		charCodeMap.put('\uff7b', '\u30b5'); // Sa
		charCodeMap.put('\uff7c', '\u30b7'); // Shi
		charCodeMap.put('\uff7d', '\u30b9'); // Su
		charCodeMap.put('\uff7e', '\u30bb'); // Se
		charCodeMap.put('\uff7f', '\u30bd'); // So
		charCodeMap.put('\uff80', '\u30bf'); // Ta
		charCodeMap.put('\uff81', '\u30c1'); // Chi
		charCodeMap.put('\uff82', '\u30c4'); // Tsu
		charCodeMap.put('\uff83', '\u30c6'); // Te
		charCodeMap.put('\uff84', '\u30c8'); // To
		charCodeMap.put('\uff85', '\u30ca'); // Na
		charCodeMap.put('\uff86', '\u30cb'); // Ni
		charCodeMap.put('\uff87', '\u30cc'); // Nu
		charCodeMap.put('\uff88', '\u30cd'); // Ne
		charCodeMap.put('\uff89', '\u30ce'); // No
		charCodeMap.put('\uff8a', '\u30cf'); // Ha
		charCodeMap.put('\uff8b', '\u30d2'); // Hi
		charCodeMap.put('\uff8c', '\u30d5'); // Hu
		charCodeMap.put('\uff8d', '\u30d8'); // He
		charCodeMap.put('\uff8e', '\u30db'); // Ho
		charCodeMap.put('\uff8f', '\u30de'); // Ma
		charCodeMap.put('\uff90', '\u30df'); // Mi
		charCodeMap.put('\uff91', '\u30e0'); // Mu
		charCodeMap.put('\uff92', '\u30e1'); // Me
		charCodeMap.put('\uff93', '\u30e2'); // Mo
		charCodeMap.put('\uff94', '\u30e4'); // Ya
		charCodeMap.put('\uff95', '\u30e6'); // Yu
		charCodeMap.put('\uff96', '\u30e8'); // Yo
		charCodeMap.put('\uff97', '\u30e9'); // Ra
		charCodeMap.put('\uff98', '\u30ea'); // Ri
		charCodeMap.put('\uff99', '\u30eb'); // Ru
		charCodeMap.put('\uff9a', '\u30ec'); // Re
		charCodeMap.put('\uff9b', '\u30ed'); // Ro
		charCodeMap.put('\uff9c', '\u30ef'); // Wa
		charCodeMap.put('\uff9d', '\u30f3'); // N
		charCodeMap.put('\uff9e', '\u3099'); // Voiced Sound Mark
		charCodeMap.put('\uff9f', '\u309a'); // Semi-Voiced Sound Mark
		// Hangul variants
		charCodeMap.put('\uffa0', '\u3164'); // Hangul Filler
		// Hangul First Range
		// KIYEOK to HIEUH
		for (char key = '\uffa1'; key <= '\uffbe'; key++) {
			char value = (char) (key - '\uce70');
			charCodeMap.put(key, value);
		}
		// Hangul Second Range
		// A to E
		for (char key = '\uffc2'; key <= '\uffc7'; key++) {
			char value = (char) (key - '\uce73');
			charCodeMap.put(key, value);
		}
		// Hangul Third Range
		// YEO to OE
		for (char key = '\uffca'; key <= '\uffcf'; key++) {
			char value = (char) (key - '\uce75');
			charCodeMap.put(key, value);
		}
		// Hangul Fourth Range
		// YO to YU
		for (char key = '\uffd2'; key <= '\uffd7'; key++) {
			char value = (char) (key - '\uce77');
			charCodeMap.put(key, value);
		}
		// More Hangul variants
		charCodeMap.put('\uffda', '\u3161'); // Hangul EU
		charCodeMap.put('\uffdb', '\u3162'); // Hangul YI
		charCodeMap.put('\uffdc', '\u3163'); // Hangul I
		// Symbol Variants
		charCodeMap.put('\uffe8', '\u2502'); // Forms Light Vertical
		charCodeMap.put('\uffe9', '\u2190'); // Leftwards Arrow
		charCodeMap.put('\uffea', '\u2191'); // Upwards Arrow
		charCodeMap.put('\uffeb', '\u2192'); // Rightwards Arrow
		charCodeMap.put('\uffec', '\u2193'); // Downwards Arrow
		charCodeMap.put('\uffed', '\u25a0'); // Black Square
		charCodeMap.put('\uffee', '\u25cb'); // White Circle
	}
	
	/**
	 * Takes an unnormalized (Halfwidth/Fullwidth) and outputs a normalized string
	 */
	public static void main(String[] args) {
		String[] unnormalized = {"Ａｓｉａ", "アジア", "ｱｼﾞｱ"};
		for(int j=0; j<unnormalized.length; j++){
			System.out.println("Unnormalized:\t " + unnormalized[j]);
			char[] buffer = unnormalized[j].toCharArray();
			int bufferLen = buffer.length;
			for (int i = 0; i < bufferLen; i++) {
				if (charCodeMap.containsKey(buffer[i])) {
					buffer[i] = charCodeMap.get(buffer[i]);
				}
			}
			System.out.println("Normalized:\t " + new String(buffer));
		}
	}
	
}