2013年9月7日 星期六

java取得網頁原始碼

 public  String getPage(String url) {
        try {
            URL url = new URL(page);
            HttpURLConnection con = (HttpURLConnection) url.openConnection();
            // 因為服務器的安全設置不接受Java程序作為客户端訪問,解决方案是設置客户端的User Agent
            con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0;Windows NT; DigExt)");
            BufferedReader reader = new BufferedReader(new InputStreamReader(con.getInputStream(),"utf-8"));
            StringBuilder b = new StringBuilder();
            String line;
            while ((line = reader.readLine()) != null) {b.append(line + "\r\n");}
            return b.toString();
        } catch (FileNotFoundException ex) {System.out.println("NOT FOUND:" + page);           
        } catch (ConnectException ex) {System.out.println("Timeout:" + page);
        } catch (Exception ex) { ex.printStackTrace(); }         
        return null;
    }

沒有留言:

張貼留言