import java.io.*;
import java.net.*;
import java.util.regex.*;
public class asd {
public static void main(String[] args) {
// TODO Auto-generated method stub
URL url = null;
URLConnection con = null;
try {
url = new URL("http://www.naver.com");
con = url.openConnection();
BufferedReader br;
BufferedWriter bw;
BufferedWriter bw1;
BufferedReader br1;
int count = 0;
int i = 0;
String l;
String[] regex = { "<img[^>]*src=[\"']([^>\"']+)[\"']?[^>]*>",
"<a\\s+href\\s*=\\s*\"?(.*?)[\"|>]" };
br1 = new BufferedReader(
new InputStreamReader(con.getInputStream()));
bw1 = new BufferedWriter(new FileWriter("text.txt"));
//while(i != 1){
while((l = br1.readLine()) != null) {
//l = br1.readLine();
// if(l == null) {i=1; count=1; break;}
java.util.regex.Pattern p = java.util.regex.Pattern
.compile(regex[0]);
java.util.regex.Pattern p1 = java.util.regex.Pattern
.compile(regex[1]);
Matcher mc = p.matcher(l);
Matcher mc1 = p1.matcher(l);
while (mc.find()) {
System.out.println(mc.group(1));
bw1.write("IMG"+mc.group(1) + "\r\n");
}
while (mc1.find()) {
System.out.println(mc1.group(1));
bw1.write("URL"+mc1.group(1) + "\r\n");
}
//}
}
bw1.close();
br1.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
'프로젝트' 카테고리의 다른 글
Cygwin 설치 2편 (0) | 2014.09.18 |
---|---|
Cygwin 설치 1편 (0) | 2014.09.18 |
루씬 다운로드 1일차 (0) | 2014.09.16 |
정규표현식 펌 (0) | 2014.07.22 |
자바 웹크롤러 - 웹페이지 긁어오기 (0) | 2014.07.07 |