-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
2014-04-10代码游戏 #30
Comments
var cheerio = require('cheerio');
var request = require('request');
request('http://www.baidu.com', function (error, response, html) {
if (!error && response.statusCode == 200) {
$ = cheerio.load(html);
console.log($('p[id=lg]').children('img').attr('src'));
}
}); |
var http = require("http");
var cheerio = require("cheerio");
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function(chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function(){
callback(null);
});
}
var url = "http://www.baidu.com";
download(url, function(data) {
if (data) {
//console.log(data);
var $ = cheerio.load(data);
console.log($("#lg > img"));
result = $("#lg > img").attr("src");
console.log(result);
}else{
console.log("error");
}
}); |
赞这个! $("#lg > img").attr("src") |
var request = require("request");
var cheerio = require("cheerio");
request({
uri: "http://www.baidu.com",
}, function(error, response, body) {
var $ = cheerio.load(body);
console.log($("#lg > img").attr("src"));
}); |
var cheerio = require("cheerio");
var http = require('http');
var url = "http://www.baidu.com";
http.get(url, function(res) {
var source = "";
res.on('data', function(data) {
source += data;
});
res.on('end', function() {
var $ = cheerio.load(source);
$('#lg > img').each(function(i, e) {
console.log($(e).attr("src"));
});
console.log("done");
}) .on('error', function() {
console.log("获取数据出现错误");
});
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
想必大家在生活工作中都会看很多的新闻、博文消息,同时也会被各个网站的广告所困扰。使用rss肯定是一个很好的选择,但是如果网站不提供rss订阅怎么办?
good,今天就请大家完成一个网页抓取的小工具,不是简单的抓取网页哦。而是可以把抓取到的数据直接转成有结构的数据,如DOM结构(这样就可以直接构建网页了)。当然可以忽略上述要求,只追求抓取网页,不过要完成以下全部功能哦(嘿嘿)
1、node.js月,不解释
2、抓取 http://www.baidu.com 页面信息,输出数据
3、第二步输出的数据简直就是一坨坨的,肯定要筛选,请大家输出百度首页logo图片的地址吧。正确答案是 http://www.baidu.com/img/bdlogo.gif ,不要直接console.log来作弊哦。会准备测试用例的。(如果用了DOM结构取数据,还会需要一丝丝XPath功底)
The text was updated successfully, but these errors were encountered: