可以使用pip安装也可以手动下载安装。
使用pip安装,在命令行执行如下命令:
sudo pip install pymysql
连接数据库如下:
import pymysql.cursors
# Connect to the database
connection = pymysql.connect(host='127.0.0.1',
port=3306,
user='root',
password='zhyea.com',
db='employees',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
也可以使用字典进行连接参数的管理,我觉得这样子更优雅一些:
import pymysql.cursors
config = {
'host':'127.0.0.1',
'port':3306,
'user':'root',
'password':'zhyea.com',
'db':'employees',
'charset':'utf8mb4',
'cursorclass':pymysql.cursors.DictCursor,
}
# Connect to the database
connection = pymysql.connect(**config)
插入数据:
执行sql语句前需要获取cursor,因为配置默认自动提交,故在执行sql语句后需要主动commit,最后不要忘记关闭连接:
from datetime import date, datetime, timedelta
import pymysql.cursors
#连接配置信息
config = {
'host':'127.0.0.1',
'port':3306,
'user':'root',
'password':'zhyea.com',
'db':'employees',
'charset':'utf8mb4',
'cursorclass':pymysql.cursors.DictCursor,
}
# 创建连接
connection = pymysql.connect(**config)
# 获取明天的时间
tomorrow = datetime.now().date() + timedelta(days=1)
# 执行sql语句
try:
with connection.cursor() as cursor:
# 执行sql语句,插入记录
sql = 'INSERT INTO employees (first_name, last_name, hire_date, gender, birth_date) VALUES (%s, %s, %s, %s, %s)'
cursor.execute(sql, ('Robin', 'Zhyea', tomorrow, 'M', date(1989, 6, 14)));
# 没有设置默认自动提交,需要主动提交,以保存所执行的语句
connection.commit()
finally:
connection.close();
执行查询:
import datetime
import pymysql.cursors
#连接配置信息
config = {
'host':'127.0.0.1',
'port':3306,
'user':'root',
'password':'zhyea.com',
'db':'employees',
'charset':'utf8mb4',
'cursorclass':pymysql.cursors.DictCursor,
}
# 创建连接
connection = pymysql.connect(**config)
# 获取雇佣日期
hire_start = datetime.date(1999, 1, 1)
hire_end = datetime.date(2016, 12, 31)
# 执行sql语句
try:
with connection.cursor() as cursor:
# 执行sql语句,进行查询
sql = 'SELECT first_name, last_name, hire_date FROM employees WHERE hire_date BETWEEN %s AND %s'
cursor.execute(sql, (hire_start, hire_end))
# 获取查询结果
result = cursor.fetchone()
print(result)
# 没有设置默认自动提交,需要主动提交,以保存所执行的语句
connection.commit()
finally:
connection.close();
从结果集中获取指定数目的记录,可以使用fetchmany方法:
result = cursor.fetchmany(2)
不过不建议这样使用,最好在sql语句中设置查询的记录总数。
获取全部结果集可以使用fetchall方法:
result = cursor.fetchall()
[{'last_name': 'Vanderkelen', 'hire_date': datetime.date(2015, 8, 12), 'first_name': 'Geert'}, {'last_name':'Zhyea', 'hire_date': datetime.date(2015, 8, 21), 'first_name': 'Robin'}]
下面是在Django中的使用:
在django中使用是我找这个的最初目的。目前同时支持python3.4、django1.8的数据库backend并不好找。这个是我目前找到的最好用的。
设置DATABASES和官方推荐使用的MySQLdb的设置没什么区别:
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'mytest',
'USER': 'root',
'PASSWORD': 'zhyea.com',
'HOST': '127.0.0.1',
'PORT': '3306',
}
}
关键是这里:我们还需要在站点的__init__.py文件中添加如下的内容
import pymysql
pymysql.install_as_MySQLdb()
本人自己做的项目是自己写的sql语句,连接的数据库,数据库中创建好的表结构,所以只需要连接数据库,然后对数据库进行增删改查就行了。下面是项目中的一些代码:
# coding: utf-8
# author: jeff
# create: 2017年12月22日 18:05
# import json
import datetime
import requests
import time
from scrapy.utils.project import get_project_settings
import pymysql
settings = get_project_settings()
ISOTIMEFORMAT = '%Y-%m-%d %X'
def nowTime():
ISOTIMEFORMAT = '%Y-%m-%d %X'
mytime = time.strftime(ISOTIMEFORMAT, time.localtime(time.time()))
return mytime
# class Setting():
# def __init__(self, json_file):
# self.settings = {}
# with open(json_file, mode='r', encoding='utf-8') as f:
# self.settings = json.loads(f.read())
# def save(self, json_file):
# with open(json_file, mode='w', encoding='utf-8') as f:
# json.dump(self.settings, f)
class Message():
def __init__(self):
self.sender = ''
self.recipient = ''
self.url_profile = ''
self.send_time = datetime.datetime.now()
self.message = ''
self.tag = ''
def connectDB(self):
"""
数据库连接
"""
conn = pymysql.connect(host=settings['DB_HOST'],
port=3306,
user=settings['DB_USER'],
passwd=settings['DB_PASSWD'],
db=settings['DB_DB'],
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
return conn
def save(self):
conn = self.connectDB()
with conn.cursor() as cursor:
sql = """ INSERT INTO {DB_TABLE}
(sender, recipient, urlProfile, send_time, message, channel, tag)
VALUES('{sender}', '{recipient}', '{url_profile}', '{send_time}', '{message}', '{channel}', '{tag}')
""".format(DB_TABLE=settings['DB_MSG_TABLE'],
sender=pymysql.escape_string(self.sender),
recipient=pymysql.escape_string(self.recipient),
url_profile=pymysql.escape_string(self.url_profile),
send_time=datetime.datetime.now(),
channel='linkedin_search_send',
message=self.message,
tag=self.tag,
)
result = cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
print(result)
return result
def check(self, interval):
conn = self.connectDB()
with conn.cursor() as cursor:
sql = """ SELECT send_time
FROM {DB_TABLE}
WHERE urlProfile='{url_profile}' AND sender='{sender}'
ORDER BY send_time DESC
""".format(DB_TABLE=settings['DB_MSG_TABLE'],
url_profile=self.url_profile,
sender=self.sender
)
count = cursor.execute(sql)
results = cursor.fetchall()
cursor.close()
conn.close()
if count:
send_time = results[0]['send_time']
time_delta = (self.send_time - send_time).total_seconds() / 3600
if time_delta > int(interval):
return True
else:
return False
else:
return True
class Task():
def __init__(self, kwargs): # kwargs 为爬虫初始化时传入的字典参数
self.city = kwargs['city']
self.count_succ = int(kwargs['count_succ'])
self.cur_page = int(kwargs['cur_page'])
self.exclude = kwargs['exclude']
self.ft_distance = kwargs['ft_distance']
self.is_continue = kwargs['is_continue']
# self.is_invite = kwargs['isInvite']
self.keyword = kwargs['keyword']
self.max_num = int(kwargs['max_num'])
self.max_page = int(kwargs['max_page'])
self.msg = kwargs['msg']
self.interval = kwargs['interval']
self.passwd = kwargs['passwd']
self.start_page = int(kwargs['start_page'])
self.task_id = kwargs['task_id']
self.user = kwargs['user']
self.status = ''
self.task_error = 'normal'
if self.is_continue == "0":
self.cur_page = self.start_page
def check_status(self):
task_id = self.task_id
ret = requests.get(settings['WEBSITE'] + "task/" + str(task_id) + "/status")
json_data = ret.json()
self.status = json_data['status']
if self.status == 'shutting_down':
return False
else:
return True
def set_status(self, status):
task_id = self.task_id
payload = {"status": status, "task_error": self.task_error}
ret = requests.post(settings['WEBSITE'] + "task/" + str(task_id) + "/status", data=payload)
print(ret.text)
def save(self):
payload = {"count_succ": self.count_succ, "cur_page": self.cur_page}
print(payload)
ret = requests.post(settings['WEBSITE'] + "task/" + str(self.task_id) + "/status", data=payload)
print(ret.text)
class Person():
def __init__(self):
self.name = ''
self.position = ''
self.company = ''
self.industry = ''
self.location = ''
self.city = ''
self.cell_phone = ''
self.email = ''
self.job_years = ''
self.skill_english = ''
self.edu_major = ''
self.edu_degree = ''
self.edu_graduate = ''
self.career_path = ''
self.career_location = ''
self.urlprofile = ''
self.headline = ''
self.wechatqr = ''
self.wechat = ''
self.im_qq = ''
self.is_connected = ''
self.js_last_onboard = ''
self.source = ''
self.job_years = ''
self.job_foreign = ''
self.edu_211 = ''
self.edu_qs500 = ''
self.edu_oversea = ''
self.oversea_exp = ''
self.create_time = ''
self.sourcer = ''
self.sourcer_id = ''
self.qq = ''
self.im = ''
self.job_status = ''
self.src_file = ''
self.result = ''
self.edu_school = ''
self.source_id = ''
def connectDB(self):
"""
数据库连接
"""
conn = pymysql.connect(host=settings['DB_HOST'],
port=3306,
user=settings['DB_USER'],
passwd=settings['DB_PASSWD'],
db=settings['DB_DB'],
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
return conn
def save(self):
conn = self.connectDB()
with conn.cursor() as cursor:
# sql = """ INSERT INTO {DB_TABLE}
# (sender, recipient, urlProfile, send_time, message)
# VALUES('{user}', '{recipient}', '{url_profile}', '{send_time}', '{message}')
# """.format(DB_TABLE=settings['DB_MSG_TABLE'],
# user=pymysql.escape_string(self.sender),
# recipient=pymysql.escape_string(self.recipient),
# url_profile=pymysql.escape_string(self.url_profile),
# send_time=datetime.datetime.now(),
# message=self.message
# )
sql = """
INSERT INTO {DB_TABLE}(
name, headline, CellPhone, email, wechatQR, urlProfile, isConnected, result,
location, corp, school, industry, position, dept, job_status,
source, source_id, sourcer, src_file, create_time, modify_time
) VALUES (
"{name}", "{headline}", "{CellPhone}", "{email}", "{wechatQR}", "{urlProfile}", "{isConnected}", "{result}",
"{location}", "{corp}", "{school}", "{industry}", "{position}", "{dept}", "{job_status}",
"{source}", "{source_id}", "{sourcer}", "{src_file}", "{create_time}", "{modify_time}"
)""".format(DB_TABLE=settings['DB_PERSON_TABLE'],
name=self.name,
headline=self.headline,
CellPhone=self.cell_phone,
email=self.email,
wechatQR=self.wechatqr,
urlProfile=pymysql.escape_string(self.urlprofile),
isConnected=self.is_connected,
result=self.result,
location=self.location,
corp=self.company,
school=self.edu_school,
industry=self.industry,
position=self.position,
dept=self.industry,
job_status=self.job_status,
source=self.source,
source_id=self.source_id,
sourcer=self.sourcer,
src_file=self.src_file,
create_time=self.create_time,
modify_time=self.create_time
)
print(sql)
result = cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return result
def check(self, source_id):
"""
两种情况 new_friends 和 exist_friends
"""
if not self.urlprofile:
return False
conn = self.connectDB()
with conn.cursor() as cursor:
sql = """
SELECT * FROM {DB_TABLE} WHERE urlProfile = '{urlProfile}'
""".format(DB_TABLE=settings['DB_PERSON_TABLE'], urlProfile=pymysql.escape_string(self.urlprofile))
print(sql)
count = cursor.execute(sql)
result = cursor.fetchone()
if result:
return True
# if not result:
# check_ret = False
# else:
# if result.get('source_id') == source_id:
# # check_ret = "close"
# True
# else:
# # check_ret = "pass"
return False
class JsNotify():
def __init__(self):
self.name = ''
self.urlProfile = ''
self.notice_msg = ''
self.cur_position = ''
self.cur_company = ''
self.onboard_time = ''
self.notice_time = ''
self.js_type = '' # 默认为 resign(换公司),promotion(升迁),rotation(调岗)
self.last_company = ''
self.last_position = ''
self.status = '' # 默认为 saved(已爬去未处理),imported(已处理),drop(丢弃)
self.src_id = ''
def connectDB(self):
"""
数据库连接
"""
conn = pymysql.connect(host=settings['DB_HOST'],
port=3306,
user=settings['DB_USER'],
passwd=settings['DB_PASSWD'],
db=settings['DB_DB'],
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
return conn
def save(self):
conn = self.connectDB()
sql = """
INSERT INTO {DB_TABLE} (
name, urlProfile, notice_msg, notice_time, cur_position, cur_company, onboard_time, js_type, last_company, last_position, create_time, modify_time, status, src_id
) VALUES (
"{name}", "{urlProfile}", "{notice_msg}", "{notice_time}", "{cur_position}", "{cur_company}", "{onboard_time}", "{js_type}", "{last_company}",
"{last_position}", "{create_time}", "{modify_time}", "{status}", "{src_id}"
)
""".format(DB_TABLE=settings['DB_JS_NOTIFY_TABLE'],
name=self.name,
urlProfile=self.urlProfile,
notice_msg=self.notice_msg,
cur_position=self.cur_position,
cur_company=self.cur_company,
onboard_time=self.onboard_time,
last_position=self.last_position,
js_type=self.js_type,
last_company=self.last_company,
create_time=nowTime(),
modify_time=nowTime(),
status='saved',
src_id=self.src_id,
notice_time=self.notice_time
)
print(sql)
with conn.cursor() as cursor:
ret = cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return ret
def check(self, interval):
# 根据时间判定
now_time = datetime.datetime.now()
timepoint = now_time - datetime.timedelta(hours=int(interval))
notice_time = datetime.datetime.strptime(self.notice_time, ISOTIMEFORMAT)
if notice_time < timepoint:
return "stop"
# 数据库查重
conn = self.connectDB()
sql = """
SELECT *
FROM {DB_TABLE}
WHERE name="{name}" AND urlProfile="{urlProfile}" AND notice_msg="{notice_msg}" AND notice_time="{notice_time}"
""".format(DB_TABLE=settings['DB_JS_NOTIFY_TABLE'],
name=self.name,
urlProfile=self.urlProfile,
notice_msg=self.notice_msg,
notice_time=self.notice_time
)
print(sql)
with conn.cursor() as cursor:
count = cursor.execute(sql)
results = cursor.fetchall()
conn.commit()
cursor.close()
conn.close()
if count:
src_id = results[0]['src_id']
if src_id == self.src_id:
ret = "stop"
else:
ret = "skip"
else:
ret = "save"
return ret
if __name__ == '__main__':
task = Task()
task.task_id = 27
print(task.check_status())