package crawler;
import java.io.*;
import java.net.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.*;
public class test {
public static void main(String[] args) {
// TODO Auto-generated method stub
URL url;
try {
url = new URL("http://www.naver.com");
BufferedReader br;
BufferedWriter bw;
String l;
br = new BufferedReader(new InputStreamReader(url.openStream(),"utf-8"));
bw = new BufferedWriter(new FileWriter("text11.txt"));
while ((l = br.readLine()) != null) {
Pattern p = Pattern.compile("<img[^>]*src=[\"']?([^>\"']+)[\"']?[^>]*>");
Matcher mc = p.matcher(l);
while(mc.find()){
bw.write(mc.group(1));
}
}
br.close();
bw.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
'프로젝트' 카테고리의 다른 글
Cygwin 설치 2편 (0) | 2014.09.18 |
---|---|
Cygwin 설치 1편 (0) | 2014.09.18 |
루씬 다운로드 1일차 (0) | 2014.09.16 |
웹크롤러 소스중 (1) | 2014.07.23 |
정규표현식 펌 (0) | 2014.07.22 |