Go语言视频零基础入门到精通

javalearner · 发表于 2014-10-1 04:52:05

/*
  * Copyright (c) 2002-2003 Che, Dong Email: chedongATbigfoot.com/chedongATchedong.com
  * $Id: HelloUnicode.java,v 1.3 2003/03/09 08:41:46 chedong Exp $
  */




  import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;

/**
  * 目的：
  *    测试不同字符编码解码方式对多字节编码（中文）处理
  *    的影响输入：
  *    可以从命令行输入测试字符串输出：
  *    测试1 按照不同解码方式处理字符串，
  *    并按不同编码方式写入文件
  *    测试2 按照不同解码方式从文件中将字符串读出
  * @author Che, Dong
  */




class HelloUnicode {
   /**
   * main entrance
   * @param args command line arguments
   */
   public static void main(String[] args) {
      String hello = "Hello world 世界你好";

      //read from command line input
      if (args.length > 0) {
         hello = args[0];
      }

      try {
         /*
            * 试验1: 从测试字符串按系统缺省编码方式解码，并写入文件
            */
         System.out.println(">>>>testing1: write hello world to files<<<<");
         System.out.println("[test 1-1]: with system default encoding="
               + System.getProperty("file.encoding") + "
string=" + hello
               + " length=" + hello.length());
         printCharArray(hello);
         writeFile("hello.orig.HTML", hello);

         //把字符串按GB2312解码
         hello = new String(hello.getBytes(), "GB2312");
         System.out.println(
               "[test 1-2]: getBytes with platform default encoding and decoding as gb2312:
string="
               + hello + " length=" + hello.length());
         writeFile("hello.gb2312.html", hello);
         printCharArray(hello);

         //把字符串按UTF8解码成字节流，并打印相应的字节
         hello = new String(hello.getBytes("UTF8"));
         System.out.println("[test 1-3]: convert string to UTF8
string="
               + hello + " length=" + hello.length());
         writeFile("hello.utf8.html", hello);
         printCharArray(hello);

         /*
            * 试验2: 从试验1的输出文件中读取，并按照不同方式解码
            */
         System.out.println(
               ">>>>testing2: reading and decoding from files<<<<");

         //first file: encoding with system default
         hello = readFile("hello.orig.html");
         System.out.println(
               "[test 2-1]: read hello.orig.html: decoding with system default encoding
string="
               + hello + " length=" + hello.length());
         printCharArray(hello);

         //second file: decoding from GBK
         hello = readFile("hello.gb2312.html");
         //  hello = new String(hello.getBytes(), "GB2312");
         System.out.println(
               "[test 2-2]: read hello.gb2312.html: decoding as GB2312
string="
               + hello + " length=" + hello.length());
         printCharArray(hello);

         //third file: decoding from UTF8
         hello = readFile("hello.utf8.html");
         hello = new String(hello.getBytes(), "UTF8");
         System.out.println(
               "[test 2-3]: read hello.utf8.html: decoding as UTF8
string="
               + hello + " length=" + hello.length());
         printCharArray(hello);
      } catch (Exception e) {
         System.out.println(e.toString());
      }
   }

   /**
   * print char array
   * @param inStr input string
   */
   public static void printCharArray(String inStr) {
      char[] myBuffer = inStr.toCharArray();

      //list each Charactor in byte value, short value, and UnicodeBlock Mapping
      for (int i = 0; i < inStr.length(); i++) {
         byte b = (byte) myBuffer;
         short s = (short) myBuffer;
         String hexB = Integer.toHexString(b).toUpperCase();
         String hexS = Integer.toHexString(s).toUpperCase();
         StringBuffer sb = new StringBuffer();

         //print char
         sb.append("char[");
         sb.append(i);
         sb.append("]="");
         sb.append(myBuffer);
         sb.append("" ");

         //byte value
         sb.append("byte=");
         sb.append(b);
         sb.append(" \u");
         sb.append(hexB);
         sb.append(" ");

         //short value
         sb.append("short=");
         sb.append(s);
         sb.append(" \u");
         sb.append(hexS);
         sb.append(" ");

         //Unicode Block
         sb.append(Character.UnicodeBlock.of(myBuffer));

         System.out.println(sb.toString());
      }

      System.out.println();
   }

   /**
   * write content to output file
   * @param fileName output file name
   * @param content  file content to write
   */
   private static void writeFile(String fileName, String content) {
      try {
         File tmpFile = new File(fileName);

         if (tmpFile.exists()) {
               tmpFile.delete();
         }

         FileWriter fw = new FileWriter(fileName, true);
         fw.write(content);
         fw.close();
      } catch (Exception e) {
         System.out.println(e.toString());
      }
   }

   /**
   * read content from input file
   * @param fileName input file name
   * @return String file content
   */
   private static String readFile(String fileName) {
      try {
         BufferedReader fr = new BufferedReader(new FileReader(fileName));
         StringBuffer out = new StringBuffer();
         String thisLine = new String();

         while (thisLine != null) {
               thisLine = fr.readLine();

               if (thisLine != null) {
                  out.append(thisLine);
               }
         }

         fr.close();

         return out.toString();
      } catch (Exception e) {
         System.out.print(e.toString());
         return null;
      }
   }
}

运行结果：

C:ja>java HelloUnicode
>>>>testing1: write hello world to files<<<<
[test 1-1]: with system default encoding=GBK
string=Hello world 世界你好    length=16
char[0]="H"    byte=72 u48 short=72 u48 BASIC_LATIN
char[1]="e"    byte=101 u65 short=101 u65  BASIC_LATIN
char[2]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[3]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[4]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[5]=" "    byte=32 u20 short=32 u20 BASIC_LATIN
char[6]="w"    byte=119 u77 short=119 u77  BASIC_LATIN
char[7]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[8]="r"    byte=114 u72 short=114 u72  BASIC_LATIN
char[9]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[10]="d" byte=100 u64 short=100 u64  BASIC_LATIN
char[11]=" " byte=32 u20 short=32 u20 BASIC_LATIN
char[12]="世" byte=22 u16 short=19990 u4E16    CJK_UNIFIED_IDEOGRAPHS
char[13]="界" byte=76 u4C short=30028 u754C    CJK_UNIFIED_IDEOGRAPHS
char[14]="你" byte=96 u60 short=20320 u4F60    CJK_UNIFIED_IDEOGRAPHS
char[15]="好" byte=125 u7D short=22909 u597D    CJK_UNIFIED_IDEOGRAPHS

[test 1-2]: getBytes with platform default encoding and decoding as gb2312:
string=Hello world 世界你好    length=16
char[0]="H"    byte=72 u48 short=72 u48 BASIC_LATIN
char[1]="e"    byte=101 u65 short=101 u65  BASIC_LATIN
char[2]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[3]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[4]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[5]=" "    byte=32 u20 short=32 u20 BASIC_LATIN
char[6]="w"    byte=119 u77 short=119 u77  BASIC_LATIN
char[7]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[8]="r"    byte=114 u72 short=114 u72  BASIC_LATIN
char[9]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[10]="d" byte=100 u64 short=100 u64  BASIC_LATIN
char[11]=" " byte=32 u20 short=32 u20 BASIC_LATIN
char[12]="世" byte=22 u16 short=19990 u4E16    CJK_UNIFIED_IDEOGRAPHS
char[13]="界" byte=76 u4C short=30028 u754C    CJK_UNIFIED_IDEOGRAPHS
char[14]="你" byte=96 u60 short=20320 u4F60    CJK_UNIFIED_IDEOGRAPHS
char[15]="好" byte=125 u7D short=22909 u597D    CJK_UNIFIED_IDEOGRAPHS

[test 1-3]: convert string to UTF8
string=Hello world 涓��浣�ソ length=18
char[0]="H"    byte=72 u48 short=72 u48 BASIC_LATIN
char[1]="e"    byte=101 u65 short=101 u65  BASIC_LATIN
char[2]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[3]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[4]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[5]=" "    byte=32 u20 short=32 u20 BASIC_LATIN
char[6]="w"    byte=119 u77 short=119 u77  BASIC_LATIN
char[7]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[8]="r"    byte=114 u72 short=114 u72  BASIC_LATIN
char[9]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[10]="d" byte=100 u64 short=100 u64  BASIC_LATIN
char[11]=" " byte=32 u20 short=32 u20 BASIC_LATIN
char[12]="涓" byte=-109 uFFFFFF93 short=28051 u6D93    CJK_UNIFIED_IDEO
GRAPHS
char[13]="�" byte=43 u2B short=26667 u682B    CJK_UNIFIED_IDEOGRAPHS
char[14]="�" byte=107 u6B short=26219 u666B    CJK_UNIFIED_IDEOGRAPHS
char[15]="浣" byte=99 u63 short=28003 u6D63    CJK_UNIFIED_IDEOGRAPHS
char[16]="�" byte=-78 uFFFFFFB2    short=29362 u72B2    CJK_UNIFIED_IDEO
GRAPHS
char[17]="ソ" byte=-67 uFFFFFFBD    short=12477 u30BD    KATAKANA

>>>>testing2: reading and decoding from files<<<<
[test 2-1]: read hello.orig.html: decoding with system default encoding
string=Hello world 世界你好    length=16
char[0]="H"    byte=72 u48 short=72 u48 BASIC_LATIN
char[1]="e"    byte=101 u65 short=101 u65  BASIC_LATIN
char[2]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[3]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[4]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[5]=" "    byte=32 u20 short=32 u20 BASIC_LATIN
char[6]="w"    byte=119 u77 short=119 u77  BASIC_LATIN
char[7]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[8]="r"    byte=114 u72 short=114 u72  BASIC_LATIN
char[9]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[10]="d" byte=100 u64 short=100 u64  BASIC_LATIN
char[11]=" " byte=32 u20 short=32 u20 BASIC_LATIN
char[12]="世" byte=22 u16 short=19990 u4E16    CJK_UNIFIED_IDEOGRAPHS
char[13]="界" byte=76 u4C short=30028 u754C    CJK_UNIFIED_IDEOGRAPHS
char[14]="你" byte=96 u60 short=20320 u4F60    CJK_UNIFIED_IDEOGRAPHS
char[15]="好" byte=125 u7D short=22909 u597D    CJK_UNIFIED_IDEOGRAPHS

[test 2-2]: read hello.gb2312.html: decoding as GB2312
string=Hello world 世界你好    length=16
char[0]="H"    byte=72 u48 short=72 u48 BASIC_LATIN
char[1]="e"    byte=101 u65 short=101 u65  BASIC_LATIN
char[2]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[3]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[4]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[5]=" "    byte=32 u20 short=32 u20 BASIC_LATIN
char[6]="w"    byte=119 u77 short=119 u77  BASIC_LATIN
char[7]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[8]="r"    byte=114 u72 short=114 u72  BASIC_LATIN
char[9]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[10]="d" byte=100 u64 short=100 u64  BASIC_LATIN
char[11]=" " byte=32 u20 short=32 u20 BASIC_LATIN
char[12]="世" byte=22 u16 short=19990 u4E16    CJK_UNIFIED_IDEOGRAPHS
char[13]="界" byte=76 u4C short=30028 u754C    CJK_UNIFIED_IDEOGRAPHS
char[14]="你" byte=96 u60 short=20320 u4F60    CJK_UNIFIED_IDEOGRAPHS
char[15]="好" byte=125 u7D short=22909 u597D    CJK_UNIFIED_IDEOGRAPHS

[test 2-3]: read hello.utf8.html: decoding as UTF8
string=Hello world 世界你好    length=16
char[0]="H"    byte=72 u48 short=72 u48 BASIC_LATIN
char[1]="e"    byte=101 u65 short=101 u65  BASIC_LATIN
char[2]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[3]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[4]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[5]=" "    byte=32 u20 short=32 u20 BASIC_LATIN
char[6]="w"    byte=119 u77 short=119 u77  BASIC_LATIN
char[7]="o"    byte=111 u6F short=111 u6F  BASIC_LATIN
char[8]="r"    byte=114 u72 short=114 u72  BASIC_LATIN
char[9]="l"    byte=108 u6C short=108 u6C  BASIC_LATIN
char[10]="d" byte=100 u64 short=100 u64  BASIC_LATIN
char[11]=" " byte=32 u20 short=32 u20 BASIC_LATIN
char[12]="世" byte=22 u16 short=19990 u4E16    CJK_UNIFIED_IDEOGRAPHS
char[13]="界" byte=76 u4C short=30028 u754C    CJK_UNIFIED_IDEOGRAPHS
char[14]="你" byte=96 u60 short=20320 u4F60    CJK_UNIFIED_IDEOGRAPHS
char[15]="好" byte=125 u7D short=22909 u597D    CJK_UNIFIED_IDEOGRAPHS












源码下载：http://file.javaxxz.com/2014/10/1/045204531.zip

		自动登录	找回密码
密码			立即注册

JAVA高级面试进阶视频教程	Java架构师系统进阶VIP课程	分布式高可用全栈开发微服务教程	Go语言视频零基础入门到精通	Java架构师3期(课件+源码)
Java开发全终端实战租房项目视频教程	SpringBoot2.X入门到高级使用教程	大数据培训第六期全套视频教程	深度学习（CNN RNN GAN）算法原理	Java亿级流量电商系统视频教程
互联网架构师视频教程	年薪50万Spark2.0从入门到精通	年薪50万！人工智能学习路线教程	年薪50万！大数据从入门到精通学习路线	年薪50万！机器学习入门到精通视频教程
仿小米商城类app和小程序视频教程	深度学习数据分析基础到实战	最新黑马javaEE2.1就业课程	从 0到JVM实战高手教程	MySQL入门到精通教程

分布式高可用全栈开发微服务教程

Go语言视频零基础入门到精通

Java开发全终端实战租房项目视频教程

SpringBoot2.X入门到高级使用教程

大数据培训第六期全套视频教程

Java亿级流量电商系统视频教程

年薪50万Spark2.0从入门到精通

年薪50万！人工智能学习路线教程

[Java基础知识]Java输入输出过程中的字节流到字符流的转换

相关帖子

浏览过的版块