scrapy框架pipelines管道文件基本写法
import json
# 转码操作,继承json.JSONEncoder的子类,在json目录下的encoder.py中
class MyEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, bytes):
return str(o, encoding='utf-8')
return json.JSONEncoder.default(self, o)
class MySpiderPipeline(object):
def __init__(self):
self.filename = open("myInfo.json","w",encoding="utf-8")
def process_item(self, item, spider):
text = json.dumps((dict(item)),ensure_ascii=False,cls=MyEncoder) +'\n'
self.filename.write(text)
return item
def close_spider(self,spider):
print("数据处理完毕,谢谢使用!")
self.filename.close()