We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
import requests from bs4 import BeautifulSoup def weather(url): html_doc = get_raw_html(url) data = extract_data(html_doc) return data def get_raw_html(url): headers = { "Connection": "keep-alive", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip, deflate", "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7" } res = requests.get(url, headers=headers) return res.text def extract_data(doc): soup = BeautifulSoup(doc, "lxml") box = soup.find('div', class_='n-container') today_box = box.find('div', class_='c-left') week_box = box.find('div', class_='c-right') box_24h = soup.find('div', class_='cleft-24hours') #current location = box.select_one('a')['cname'] temp = today_box.select_one('span.num').contents[0] desc = today_box.select_one('p.text').string date = today_box.select_one('a.date').string.strip() wind = today_box.select_one('span.wind').contents[2] hundity = today_box.select_one('span.hundity').contents[2] aqi = today_box.select_one('span.liv-text > a > em').string aq = today_box.select_one('span.liv-text > a > span.liv-img').string updated_at = today_box.select_one('div.row4 > p').string #7d dates = [tag.string for tag in week_box.findAll('p', class_='date')] texts = [tag.string for tag in week_box.findAll('p', class_='text')] deses = [tag.string for tag in week_box.findAll('p', class_='des')] winds = [tag.string for tag in week_box.findAll('p', class_='wind')] maxTemps = week_box.select_one('div.r-temp')['data-high'].split(",") minTemps = week_box.select_one('div.r-temp')['data-low'].split(",") forecast7d = list( map( lambda tup: { 'date': tup[0], 'text': tup[1], 'des': tup[2], 'maxTemp': tup[3], 'minTemp': tup[4], 'wind': tup[5], }, zip(dates,texts,deses,maxTemps,minTemps,winds) ) ) #24h times = [tag.string for tag in box_24h.findAll('p', class_='time')] temps = [tag.string for tag in box_24h.findAll('p', class_='temp')] forecast24h = list( map( lambda tup: { 'time': tup[0], 'temp': tup[1] }, zip(times, temps) ) ) return { 'current': { '位置': location, '温度': temp, '天气': desc, 'date': date, '风力': wind, '湿度': hundity, 'AQI': aqi, '空气质量': aq, 'updated_at': updated_at }, 'forecast7d': forecast7d, 'forecast24h': forecast24h } data = weather("http://tianqi.sogou.com/pc/weather/2332634") print(data['current']['date']) print(data['current']['位置'], data['current']['天气'], data['current']['温度']+'°', data['current']['湿度'], data['current']['风力']) print('AQI: '+data['current']['AQI']) print('空气质量: ' + data['current']['空气质量']) print(data['current']['updated_at'])
The text was updated successfully, but these errors were encountered:
No branches or pull requests
The text was updated successfully, but these errors were encountered: