java网络编程url实现扒网站上的图片

java网络编程url实现扒网站上的图片

小型的java爬虫

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package com.URL;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class UrlTest {
public static void main(String[] args) {
String url = "http://www.qq.com";
try {
URL u = new URL(url);
URLConnection uc = u.openConnection();
/** 获取html源文件 保存到文件 **/
InputStream in =uc.getInputStream();
InputStreamReader r = new InputStreamReader(in,"utf-8");
BufferedReader br = new BufferedReader(r);
String s = "";
StringBuffer sum = new StringBuffer();
while((s = br.readLine())!=null){
sum.append(s);
sum.append("\n");
}
System.out.println(sum);
//获取img标签的src属性
Pattern pat = Pattern.compile("<img[^>]+src\\s*=\\s*['\"]([^'\"]+)['\"][^>]*>");
Matcher m = pat.matcher(sum.toString());
while (m.find()) {
System.out.println(m.group(1));
writeimg(m.group(1));

}

} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void writeimg(String urlimg){
try{
//获取传入网络图片名
File f = new File(urlimg);
String imgname = f.getName();//获取网络图片的名称

File f1 = new File("V:/java练习/img/"+imgname);//图片保存到本机的路径
if(!f1.exists()){
f1.createNewFile();
}
OutputStream out = new FileOutputStream(f1);
//
URL u = new URL(urlimg);
URLConnection uc = u.openConnection();
InputStream in = uc.getInputStream();
byte[] b = new byte[1024];
int len = 0;
while((len = in.read(b))!=-1){
out.write(b,0,len);
}
in.close();
out.flush();
out.close();

}catch(Exception e){
e.printStackTrace();
}
}
}


本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!