关系型数据库基于关系模型,而关系模型通过二维表存储,关系型数据库有:SQLite、MySQL、Oracle、SQL Server、DB2等。
MySQL数据库存储
1.连接数据库
import pymysql
#用connect()方法申明一个mysql连接对象db
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306)
#获得mysql操作游标,利用游标来执行sql语句
cursor = db.cursor()
#新建数据库,并调用execute()方法执行
cursor.execute('CREATE DATABASE spider DEFAULT CHARACTER SET utf8')
#关闭数据库
db.close()
2.创建表
import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
#创建表
sql = 'CREATE TABLE IF NOT EXISTS user (id VARCHAR(255) NOT NULL,name VARCHAR(255) NOT NULL,age INT NOT NULL,PRIMARY KEY (id))'
cursor.execute(sql)
db.close()
3.插入数据
import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
sql = 'INSERT INTO user(id,name,age) values("001","xxx",18)'
try:
cursor.execute(sql)
db.commit() #真正将语句提交到数据执行的方法,对数据库插入、更新、删除都需调用
except:
db.rollback()
db.close()
动态插入:
import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
data = {
"id":"002",
"name":'ss',
"age":20
}
table = 'user'
keys = ','.join(data.keys())
values = ','.join(['%s']*len(data))
sql = 'INSERT INTO {table}({keys}) values({values})'.format(table=table,keys=keys,values=values)
try:
if cursor.execute(sql,tuple(data.values())):
print('成功')
db.commit()
except:
print('失败')
db.rollback()
db.close()
4.更新数据
import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
sql = 'UPDATE user SET age=25 WHERE id="001"'
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
db.close()
动态更新并去重,如果数据存在,则更新,如果数据不存在则插入:
import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
data = {
"id":"002",
"name":'ss',
"age":40
}
table = 'user'
keys = ','.join(data.keys())
values = ','.join(['%s']*len(data))
sql = 'INSERT INTO {table}({keys}) values({values}) ON DUPLICATE KEY UPDATE'.format(table=table,keys=keys,values=values)
update = ','.join([" {key}=%s".format(key=key) for key in data])
sql += update
try:
if cursor.execute(sql,tuple(data.values())*2):
print('成功')
db.commit()
except:
print('失败')
db.rollback()
db.close()
5.删除
import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
table = 'user'
cond = 'age > 30'
sql = 'DELETE FROM {table} WHERE {cond}'.format(table=table,cond=cond)
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
db.close()
6.查询
import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
table = 'user'
cond = 'age > 20'
sql = 'SELECT * FROM {table} WHERE {cond}'.format(table=table,cond=cond)
try:
cursor.execute(sql)
print(cursor.rowcount) #获取查询条数
print(cursor.fetchone()) #查询一条,以元组形式展示结果
print(cursor.fetchall()) #查询所有,不含上面已查询的,fetchall()内部实现偏移指针用来指向查询结果,获取一条后,指针指向下一条,以二元组形式全部展示结果。若数量大开销高,可用while加fetchone()代替,循环一次,指针偏移一次
except:
db.rollback()
db.close()
7.案例
#练习:爬取豆瓣电影TOP250,电影名称,评分,推荐语信息并保存到MySql中
from pyquery import PyQuery as pq
import requests
import pymysql
url = 'https://movie.douban.com/top250'
headers = {
'User-Agent': 'Mozilla/5.0(Windows NT 6.1;Win64;x64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/79.0.3945.88 Safari/537.36'
}
res = requests.get(url=url,headers=headers)
doc = pq(res.text)
items = doc('.info').items()
list_all = []
for item in items:
movies_dict = {}
name = pq(item.find('.hd').html()).find('span:first-child').text() #此处的find找到所有符合条件的
score = pq(item.find('.star').html()).find('span:nth-child(2)').text()
comment = item.find('.quote').text()
movies_dict['name'] = name
movies_dict['score'] = score
movies_dict['comment'] = comment
list_all.append(movies_dict)
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
sql = 'CREATE TABLE movies(name VARCHAR(255) NOT NULL,score FLOAT(6,1),comment VARCHAR(255) NOT NULL)'
cursor.execute(sql)
for movie in list_all:
keys = ','.join(movie.keys())
values = ','.join(['%s']*len(movie))
sql = 'INSERT INTO movies({keys}) values({values})'.format(keys=keys,values=values)
try:
if cursor.execute(sql,tuple(movie.values())):
print('成功')
db.commit()
except:
print('失败')
db.rollback()
db.close()