From efc6fc5acd351b3eea47061ad95675e9b5b6fbe5 Mon Sep 17 00:00:00 2001
From: yanzhongsino <37318862+yanzhongsino@users.noreply.github.com>
Date: Tue, 10 Apr 2018 13:04:07 +0000
Subject: [PATCH] =?UTF-8?q?Create=20=E9=A1=B9=E7=9B=AE01=E5=95=86=E9=93=BA?=
 =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=8A=A0=E8=BD=BD=E5=8F=8A=E5=AD=98=E5=82=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

创建项目01的代码
---
 ...5\275\345\217\212\345\255\230\345\202\250" | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 "\351\241\271\347\233\25601\345\225\206\351\223\272\346\225\260\346\215\256\345\212\240\350\275\275\345\217\212\345\255\230\345\202\250"

diff --git "a/\351\241\271\347\233\25601\345\225\206\351\223\272\346\225\260\346\215\256\345\212\240\350\275\275\345\217\212\345\255\230\345\202\250" "b/\351\241\271\347\233\25601\345\225\206\351\223\272\346\225\260\346\215\256\345\212\240\350\275\275\345\217\212\345\255\230\345\202\250"
new file mode 100644
index 0000000..7a6b479
--- /dev/null
+++ "b/\351\241\271\347\233\25601\345\225\206\351\223\272\346\225\260\346\215\256\345\212\240\350\275\275\345\217\212\345\255\230\345\202\250"
@@ -0,0 +1,77 @@
+'''
+【项目01】  商铺数据加载及存储
+
+作业要求：
+1、成功读取“商铺数据.csv”文件
+2、解析数据，存成列表字典格式：[{'var1':value1,'var2':value2,'var3':values,...},...,{}]
+3、数据清洗：
+① comment，price两个字段清洗成数字
+② 清除字段缺失的数据
+③ commentlist拆分成三个字段，并且清洗成数字
+4、结果存为.pkl文件
+
+'''
+
+m = []
+n = 0
+j = 0
+import re
+path = 'E:/IT/网易微专业_数据分析师（python）/Python数据分析师微专业_项目资料/项目01商铺数据加载及存储/'
+f = open(path+'商铺数据.csv','r',encoding='utf8')
+# print(f,type(f))
+f.seek(0)
+for line in f.readlines()[1:]:
+    n+=1
+    lst1=line.split(',')
+#     print(len(lst1))
+    classify=lst1[0].strip()
+#     print (classify)
+    name=lst1[1].strip()
+#     print (name)
+    co=lst1[2]
+#     print (co)
+    star=lst1[3].strip()
+#     print (star)
+    pr=lst1[4]
+#     print (pr)
+    address=lst1[5].strip()
+#     print (address)
+    com=lst1[6]
+#     print (com)
+    while classify != '' and name!='' and co!='我要点评' and star!='该商户暂无星级' and ('-' not in pr) and address!='' and com!='':
+#         comment=co.split(' ')
+        comment=int(re.sub('\D','',co.strip()))
+#         print (co_1,type(co_1))
+        price=int(re.sub('\D','',pr).strip())
+#         print(price)
+        comlst=com.split('                                ')
+#         print(comlst)
+#         print(len(comlst))
+        quality=float(re.sub('[\u4e00-\u9fa5]','',comlst[0].strip()))
+        envir=float(re.sub('[\u4e00-\u9fa5]','',comlst[1].strip()))
+        ser=float(re.sub('[\u4e00-\u9fa5]','',comlst[2].strip()))
+        data=[['name',name],
+              ['classify',classify],
+              ['comment',comment],
+              ['star',star],
+              ['price',price],
+              ['quality',quality],
+              ['environment',envir],
+              ['service',ser]]
+        m.append(dict(data))
+        j+=1
+#         print(quality,envir,ser)
+        break
+import pickle
+pic=open(path+'商铺数据.pkl','wb')
+pickle.dump(m,pic)
+pic.close()
+# print(m)
+print('数据解析完成!')
+print('一共有%i条数据'%n)
+print('有效数据有%i条'%j)
+        
+fpic=open(path+'商铺数据.pkl','rb') 
+st=pickle.load(fpic)
+print(st)  
+