TA的每日心情data:image/s3,"s3://crabby-images/8e309/8e309f4cf802aae0fde4f861b9c21feba5bf2023" alt="" | 开心 2021-3-12 23:18 |
---|
签到天数: 2 天 [LV.1]初来乍到
|
马萨玛索(http://www.masamaso.com/index.sHTML)每天10点都会推出一折商品5件,就是秒购。男装质量还不错,所以就经常去抢,感觉手动太慢了,就写了一个小爬虫程序,让自己去爬,如果是金子页面(免费商品)就会自动打开,我就可以抢到了。和大家分享一下。
思路:
1. 把所有想要的商品的链接读到程序中。
2. 分别打开每一个链接读取源代码
3. 验证是否是金子商品(源代码中含有free_msg字符串)
4. 如果是金子就把该链接用IE打开
源代码:
读链接文件:
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.List;
/**
* @author Administrator
*
*/
public class FileReader {
private String fileName;
public FileReader() {
}
public FileReader(String fileName) {
this.fileName = fileName;
}
/**
* 读取链接,返回一个List
* @return
*/
public List<String> getLines() {
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(this.fileName)));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
List<String> lines = new LinkedList<String>();
String line = null;
try {
while ( (line = reader.readLine()) != null) {
lines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return lines;
}
}[/code] URL类:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
/**
* @author Administrator
*
*/
public class Url {
private String url;
public Url() {
}
public Url(String url) {
this.url = url;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
/**
* 获得链接
* @return
*/
public URLConnection getConnection() {
URL httpUrl = null;
try {
httpUrl = new URL(url);
} catch (MalformedURLException e) {
e.printStackTrace();
}
URLConnection conn = null;
if(httpUrl != null) {
try {
conn = httpUrl.openConnection();
} catch (IOException e) {
e.printStackTrace();
}
}
return conn;
}
/**
* 获得链接上的输出流
* @return
*/
public BufferedReader getReader() {
URLConnection conn = getConnection();
BufferedReader br = null;
if(conn == null) {
return null;
}
conn.setConnectTimeout(9000);
try {
conn.connect();
br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
} catch (IOException e) {
e.printStackTrace();
return null;
}
return br;
}
/**
* 从输出流中一行一行读取文件,查看是否含有str字符串,有就返回真
* @param str
* @return
*/
public boolean isExist(String str) {
BufferedReader bis = getReader();
boolean exist = false;
String line = null;
try {
while ( (line = bis.readLine()) != null) {
exist = line.contains(str);
if(exist) {
break;
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
bis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return exist;
}
}
[/code] Digger类:
import java.io.IOException;
import java.util.List;
/**
* @author Administrator
*
*/
public class Digger extends Thread{
private Url url;
public Digger() {
super();
}
public Digger(Url url) {
this.url = url;
}
/**
* main方法,把配置信息(链接)读入程序,并为每一个链接开启一个线程
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
FileReader reader = new FileReader("F:/myworkspace/workspace/diggold/src/url.txt");
List<String> urls = reader.getLines();
for (String string : urls) {
Url url = new Url(string);
Digger digger = new Digger(url);
digger.start();
}
// Runtime.getRuntime().exec("C:/Program Files/Internet Explorer/iexplore.exe http://www.masamaso.com/index.shtml");
}
/**
* 查看该链接是否存在free_msg字段,存在即为金子 用IE打开该链接
*/
@Override
public void run() {
if(url.isExist("free_msg")) {
try {
Runtime.getRuntime().exec("C:/Program Files/Internet Explorer/iexplore.exe " + url.getUrl());
} catch (IOException e) {
e.printStackTrace();
}
}
System.out.println(url.getUrl() + "END!");
}
}[/code] url.txt配置文件
http://www.masamaso.com/goods.php?id=3128
http://www.masamaso.com/goods.php?id=3132
http://www.masamaso.com/goods.php?id=3120[/code]
写的比较简单,但是挺实用,各位看官莫笑话哈。
源码下载:http://file.javaxxz.com/2014/10/28/235830046.rar |
|