对汉字字符串按照拼音排序
2004-11-14 11:15:50
今天上水木发现有人问怎样对汉字字符串按照拼音排序,查了一遍文档,发现java缺省对字符串是按照每个Unicode字符的编码大小排序的。接着查到(要是没有了Google,世界将会怎样?)有一个java.text.Collator类,但是论坛上有人说这个类没用,和缺省的排序效果一样,要用C#写一个类生成java程序。这要是真的,那还学java干嘛?
于是把别人的Test.java改了改,分别用指定的Locale和不指定Locale试了试,结果都是OK的: import java.util.*; import java.text.*; public class Test2 { public static void testDefault() { String[] arr = {"张三", "李四", "王五", "刘六"}; Arrays.sort(arr); for (int i = 0; i < arr.length; i++) System.out.println(arr[i]); System.out.println(); } public static void testChinaLocale() { //Comparator cmp = (RuleBasedCollator)java.text.Collator.getInstance(java.util.Locale.CHINA);//try testing various locales Comparator cmp = Collator.getInstance(java.util.Locale.CHINA); String[] arr = {"张三", "李四", "王五", "刘六"}; Arrays.sort(arr, cmp); for (int i = 0; i < arr.length; i++) System.out.println(arr[i]); System.out.println(); } public static void testDefaultLocale() { //Comparator cmp = (RuleBasedCollator)java.text.Collator.getInstance(java.util.Locale.CHINA);//try testing various locales Comparator cmp = Collator.getInstance(); String[] arr = {"张三", "李四", "王五", "刘六"}; Arrays.sort(arr, cmp); for (int i = 0; i < arr.length; i++) System.out.println(arr[i]); System.out.println(); } public static void main(String args[]) { testDefault(); testChinaLocale(); testDefaultLocale(); } } 输出结果: 刘六 张三 李四 王五 李四 刘六 王五 张三 李四 刘六 王五 张三 看来Java还不是那么差嘛。至于那位老兄,估计是缺省的Locale有问题。
下面是java.net上Google到的对字符串中的字符按照拼音排序的测试代码: import java.util.*; import java.io.*; import java.text.*; public class CharSort implements Comparator{ java.text.RuleBasedCollator collator; // you can set your rules for the instance "collator" CharSort(){ collator = (RuleBasedCollator)java.text.Collator.getInstance(java.util.Locale.CHINA);//try testing various locales }
public void doSort(String str) throws java.io.IOException{ java.text.CollationKey[] keys = new java.text.CollationKey[str.length()]; for(int i=0;i<keys.length;i++){ keys[i] = collator.getCollationKey(str.substring(i,i+1)); } java.util.Arrays.sort(keys, this); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("cjk.txt"), "UTF8")); for(int i=0;i<keys.length;i++){ bw.write(keys[i].getSourceString()); } bw.newLine(); bw.close(); } public int compare(Object c1, Object c2) throws IllegalArgumentException{ if((c1 instanceof CollationKey) &&(c2 instanceof CollationKey)){ return collator.compare(((CollationKey)c1).getSourceString(), ((CollationKey)c2).getSourceString()); } else throw new IllegalArgumentException(); } public boolean equals(Object c1, Object c2){ if(this.compare(c1,c2)==0) return true; else return false; }
public static void main(String[] args) throws java.lang.Exception{ CharSort chSort = new CharSort(); String str = " ÎÒ¾õµÃÎÒÍê³ÉÕâ¸öûÓõ½Ê²Ã´¹¤¾ß"; chSort.doSort(str); //System.out.println(str); String[] arr = {"ÕÂÈý", "Àï˹", "ÍõÎå"}; java.util.Arrays.sort(arr, new CharSort()); for (int i = 0; i < arr.length; i++) System.out.println(arr[i]); } }
|