- 浏览: 15001 次
- 性别:
- 来自: 上海
最新评论
中文简体转化繁体中文
- 博客分类:
- java
public class Encoding {
// Supported Encoding Types
public static int GB2312 = 0;
public static int GBK = 1;
public static int HZ = 2;
public static int BIG5 = 3;
public static int CNS11643 = 4;
public static int UTF8 = 5;
public static int UNICODE = 6;
public static int UNICODET = 7;
public static int UNICODES = 8;
public static int ISO2022CN = 9;
public static int ISO2022CN_CNS = 10;
public static int ISO2022CN_GB = 11;
public static int ASCII = 12;
public static int OTHER = 13;
public static int TOTALTYPES = 14;
// Names of the encodings as understood by Java
public static String[] javaname;
// Names of the encodings for human viewing
public static String[] nicename;
// Names of charsets as used in charset parameter of HTML Meta tag
public static String[] htmlname;
static {
javaname = new String[TOTALTYPES];
nicename = new String[TOTALTYPES];
htmlname = new String[TOTALTYPES];
// Assign encoding names
javaname[GB2312] = "GB2312";
javaname[HZ] = "ASCII"; // What to put here? Sun doesn't support HZ
javaname[GBK] = "GBK";
javaname[ISO2022CN_GB] = "ISO2022CN_GB";
javaname[BIG5] = "BIG5";
javaname[CNS11643] = "EUC-TW";
javaname[ISO2022CN_CNS] = "ISO2022CN_CNS";
javaname[ISO2022CN] = "ISO2022CN";
javaname[UTF8] = "UTF8";
javaname[UNICODE] = "Unicode";
javaname[UNICODET] = "Unicode";
javaname[UNICODES] = "Unicode";
javaname[ASCII] = "ASCII";
javaname[OTHER] = "ISO8859_1";
// Assign encoding names
htmlname[GB2312] = "GB2312";
htmlname[HZ] = "HZ-GB-2312";
htmlname[GBK] = "GB2312";
htmlname[ISO2022CN_GB] = "ISO-2022-CN-EXT";
htmlname[BIG5] = "BIG5";
htmlname[CNS11643] = "EUC-TW";
htmlname[ISO2022CN_CNS] = "ISO-2022-CN-EXT";
htmlname[ISO2022CN] = "ISO-2022-CN";
htmlname[UTF8] = "UTF-8";
htmlname[UNICODE] = "UTF-16";
htmlname[UNICODET] = "UTF-16";
htmlname[UNICODES] = "UTF-16";
htmlname[ASCII] = "ASCII";
htmlname[OTHER] = "ISO8859-1";
// Assign Human readable names
nicename[GB2312] = "GB-2312";
nicename[HZ] = "HZ";
nicename[GBK] = "GBK";
nicename[ISO2022CN_GB] = "ISO2022CN-GB";
nicename[BIG5] = "Big5";
nicename[CNS11643] = "CNS11643";
nicename[ISO2022CN_CNS] = "ISO2022CN-CNS";
nicename[ISO2022CN] = "ISO2022 CN";
nicename[UTF8] = "UTF-8";
nicename[UNICODE] = "Unicode";
nicename[UNICODET] = "Unicode (Trad)";
nicename[UNICODES] = "Unicode (Simp)";
nicename[ASCII] = "ASCII";
nicename[OTHER] = "OTHER";
}
}
==========================================================================
public class EncodingTranslate extends Encoding {
// Simplfied/Traditional character equivalence hashes
static protected Hashtable<String, String> s2thash, t2shash;
static {
String dataline;
// Initialize and load in the simplified/traditional character hashses
s2thash = new Hashtable<String, String>();
t2shash = new Hashtable<String, String>();
try {
InputStream pydata = EncodingTranslate.class
.getResourceAsStream("hcutf8.txt");
BufferedReader in = new BufferedReader(new InputStreamReader(
pydata, "UTF8"));
while ((dataline = in.readLine()) != null) {
// Skip empty and commented lines
if (dataline.length() == 0 || dataline.charAt(0) == '#') {
continue;
}
// Simplified to Traditional, (one to many, but pick only one)
s2thash.put(dataline.substring(0, 1).intern(), dataline
.substring(1, 2));
// Traditional to Simplified, (many to one)
for (int i = 1; i < dataline.length(); i++) {
t2shash.put(dataline.substring(i, i + 1).intern(), dataline
.substring(0, 1));
}
}
} catch (Exception e) {
System.err.println(e);
}
}
public static String convertString(String dataline, int source_encoding,
int target_encoding) {
StringBuffer outline = new StringBuffer();
int lineindex;
if (source_encoding == HZ) {
dataline = hz2gb(dataline);
}
for (lineindex = 0; lineindex < dataline.length(); lineindex++) {
if ((source_encoding == GB2312 || source_encoding == GBK
|| source_encoding == ISO2022CN_GB || source_encoding == HZ
|| source_encoding == UNICODE
|| source_encoding == UNICODES || source_encoding == UTF8)
&& (target_encoding == BIG5 || target_encoding == CNS11643
|| target_encoding == UNICODET || target_encoding == ISO2022CN_CNS)) {
if (s2thash.containsKey(dataline.substring(lineindex,
lineindex + 1)) == true) {
outline.append(s2thash.get(dataline.substring(lineindex,
lineindex + 1).intern()));
} else {
outline
.append(dataline
.substring(lineindex, lineindex + 1));
}
} else if ((source_encoding == BIG5 || source_encoding == CNS11643
|| source_encoding == UNICODET || source_encoding == UTF8
|| source_encoding == ISO2022CN_CNS
|| source_encoding == GBK || source_encoding == UNICODE)
&& (target_encoding == GB2312
|| target_encoding == UNICODES
|| target_encoding == ISO2022CN_GB || target_encoding == HZ)) {
if (t2shash.containsKey(dataline.substring(lineindex,
lineindex + 1)) == true) {
outline.append(t2shash.get(dataline.substring(lineindex,
lineindex + 1).intern()));
} else {
outline
.append(dataline
.substring(lineindex, lineindex + 1));
}
} else {
outline.append(dataline.substring(lineindex, lineindex + 1));
}
}
if (target_encoding == HZ) {
// Convert to look like HZ
return gb2hz(outline.toString());
}
return outline.toString();
}
public static String hz2gb(String hzstring) {
byte[] hzbytes = new byte[2];
byte[] gbchar = new byte[2];
int byteindex = 0;
StringBuffer gbstring = new StringBuffer("");
try {
hzbytes = hzstring.getBytes("8859_1");
} catch (Exception usee) {
System.err.println("Exception " + usee.toString());
return hzstring;
}
// Convert to look like equivalent Unicode of GB
for (byteindex = 0; byteindex < hzbytes.length; byteindex++) {
if (hzbytes[byteindex] == 0x7e) {
if (hzbytes[byteindex + 1] == 0x7b) {
byteindex += 2;
while (byteindex < hzbytes.length) {
if (hzbytes[byteindex] == 0x7e
&& hzbytes[byteindex + 1] == 0x7d) {
byteindex++;
break;
} else if (hzbytes[byteindex] == 0x0a
|| hzbytes[byteindex] == 0x0d) {
gbstring.append((char) hzbytes[byteindex]);
break;
}
gbchar[0] = (byte) (hzbytes[byteindex] + 0x80);
gbchar[1] = (byte) (hzbytes[byteindex + 1] + 0x80);
try {
gbstring.append(new String(gbchar, "GB2312"));
} catch (Exception usee) {
System.err.println("Exception " + usee.toString());
}
byteindex += 2;
}
} else if (hzbytes[byteindex + 1] == 0x7e) { // ~~ becomes ~
gbstring.append('~');
} else { // false alarm
gbstring.append((char) hzbytes[byteindex]);
}
} else {
gbstring.append((char) hzbytes[byteindex]);
}
}
return gbstring.toString();
}
public static String gb2hz(String gbstring) {
StringBuffer hzbuffer;
byte[] gbbytes = new byte[2];
int i;
boolean terminated = false;
hzbuffer = new StringBuffer("");
try {
gbbytes = gbstring.getBytes("GB2312");
} catch (Exception usee) {
System.err.println(usee.toString());
return gbstring;
}
for (i = 0; i < gbbytes.length; i++) {
if (gbbytes[i] < 0) {
hzbuffer.append("~{");
terminated = false;
while (i < gbbytes.length) {
if (gbbytes[i] == 0x0a || gbbytes[i] == 0x0d) {
hzbuffer.append("~}" + (char) gbbytes[i]);
terminated = true;
break;
} else if (gbbytes[i] >= 0) {
hzbuffer.append("~}" + (char) gbbytes[i]);
terminated = true;
break;
}
hzbuffer.append((char) (gbbytes[i] + 256 - 0x80));
hzbuffer.append((char) (gbbytes[i + 1] + 256 - 0x80));
i += 2;
}
if (terminated == false) {
hzbuffer.append("~}");
}
} else {
if (gbbytes[i] == 0x7e) {
hzbuffer.append("~~");
} else {
hzbuffer.append((char) gbbytes[i]);
}
}
}
return new String(hzbuffer);
}
public static void convertFile(String sourcefile, String outfile,
int source_encoding, int target_encoding) {
BufferedReader srcbuffer;
BufferedWriter outbuffer;
String dataline;
try {
srcbuffer = new BufferedReader(new InputStreamReader(
new FileInputStream(sourcefile), javaname[source_encoding]));
outbuffer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(outfile), javaname[target_encoding]));
while ((dataline = srcbuffer.readLine()) != null) {
outbuffer.write(convertString(dataline, source_encoding,
target_encoding));
outbuffer.newLine();
}
srcbuffer.close();
outbuffer.close();
} catch (Exception ex) {
System.err.println(ex);
}
}
public static void main(String argc[]) {
int codetypes[];
char codetype;
// Determine source and target encodings, store in codetypes
codetypes = new int[2];
argc[0] = argc[0].toLowerCase();
for (int i = 0; i < 2; i++) {
codetype = argc[0].charAt(i + 1);
// Print Help
if (codetype == 'h') {
System.exit(0);
}
if (codetype == 'g') {
codetypes[i] = GB2312;
} else if (codetype == 'h') {
codetypes[i] = HZ;
} else if (codetype == 'b') {
codetypes[i] = BIG5;
} else if (codetype == 'c') {
codetypes[i] = CNS11643;
} else if (codetype == '8') {
codetypes[i] = UTF8;
} else if (codetype == 'u') {
codetypes[i] = UNICODE;
} else if (codetype == 't') {
codetypes[i] = UNICODET;
} else if (codetype == 's') {
codetypes[i] = UNICODES;
} else if (codetype == 'i') {
codetypes[i] = ISO2022CN;
} else if (codetype == '2') {
codetypes[i] = ISO2022CN_GB;
} else if (codetype == 'n') {
codetypes[i] = ISO2022CN_CNS;
} else if (codetype == 'k') {
codetypes[i] = GBK;
}
}
// Call the file convert function with appropriate arguments
EncodingTranslate.convertFile(argc[1], argc[2], codetypes[0], codetypes[1]);
}
}
//测试方法 注意附件的文件请放在同一目录下
public static void main(String[] args) {
system.out.println(EncodingTranslate.convertString("测试数据安定发生地发生法撒旦法",Encoding.GBK,Encoding.BIG5)); //转化成繁体中文
}
- hcutf8.zip (30.5 KB)
- 下载次数: 0
发表评论
-
一个基于RSA算法的Java数字签名例子
2014-09-12 11:52 1580一、前言: 网 ... -
java中使用公钥加密私钥解密原理实现license控制
2014-09-12 11:39 1505java中使用公钥加密私钥解密原理实现license控制现在很 ... -
构建高性能web站点
2014-09-09 14:53 636起因 大概花了一个月不到的时间,看完了这本400页不到的 ... -
作为软件工程师,你必须知道的20个常识
2014-09-09 14:27 591作为一名优秀是软件开发工程师,以下的这些常识你知道吗?在实 ... -
Log4j入门
2014-09-09 10:06 720Log4j是Apache的一个开放源代码项目,通过使用Lo ... -
log4j 详解
2014-09-09 10:06 543Log4J的配置文件(Configuration Fi ... -
使用Dom4j操作XML
2014-09-05 15:36 541Dom4j也可以很方便完成XML文档的创建、元素的修改、文档 ... -
Spring集成ActiveMQ配置
2014-09-05 15:30 7581. 集成环境 Spring采用2.5.6版本 ... -
用maven构建项目
2014-09-05 15:28 3491. 安装m2eclipse插件 要用Eclipse构 ... -
apache common 工具(怎样可以编写更少的代码)
2014-09-05 15:27 764common-lang (2.1) ... -
apache common简介
2014-09-05 15:24 667Apache Commons包含了很多开源的工具,用于解 ... -
equals 与 ==
2012-02-27 10:07 583java中equals和==的区别 值类型是存储在 ...
相关推荐
php 中文 简体繁体 互转
VB实现汉字简体、繁体互相转换 VB简体 繁体 互相转换 API实现 可以实现一整个字符串一次性转换,字符串中可包含非汉字字符。
易语言中文简体繁体转换源码,中文简体繁体转换,简体到繁体,繁体到简体
javascript 汉字简体繁体转换,测试通过。
一款实用的转换工具,可以在简体与繁体之间互相转换,操作方便,实用。
可以用scala ,也可以用java,我主要的目的是用来判断汉字是否为繁体,
繁体中文与简体中文之间互相转换
使用SQL server 把简体中文转换繁体的过程
java工具类,中文简体转繁体,ZHConverter,可以对文件夹内的所有文件整体转换。
汉字的简体和繁体之间的转换,并且可以批量文件进行转换.绝对可以满足个人的文字转换需求.
繁体中文 这个是每个页面中显示进行简繁转换的链接,您可以放在页面顶部,如顶部的导航处。 如果您用的是默认的动易模板,则进入网站后台,依次点击“系统设置”->“自定义标签管理”->“2006海蓝”->“{$MY_...
中文繁体简体转换的依赖文件, 解压后与程序代码放在同一目录即可. 代码与博文中的一致, 可以复制粘贴过去使用.
易语言中文简体繁体转换.rar 易语言中文简体繁体转换.rar 易语言中文简体繁体转换.rar 易语言中文简体繁体转换.rar 易语言中文简体繁体转换.rar 易语言中文简体繁体转换.rar
非常方便好用的jar,网上好多关于简体繁体转换的都是狗屁(我之前各种被坑)
VC++中文繁体、简体(BIG-GBK)编码转换程序
Microsoft.International.Converters.TraditionalChineseToSimplifiedConverter 命名空间包含繁体中文和简体中文转换的工具。
JavaScript实现网页中文简体繁体转换