数据存储

前面已经实现了数据的下载和解析,接下来,我们需要把数据存储到数据库中,将来写接口的时候,可以直接读取我们自己的数据库获取数据,顺便练习一下python的数据库操作。

准备工作

  1. 数据库,这里我们选择使用mysql
  2. 数据库管理工具,我们选择免费的DBeaver
  3. PyMySQL

数组处理

我们精简一下数据,修改后的实体类如下

class VideoBean:
    """
    视频数据
    """
    def __init__(self, data):
        self.id = data["id"]
        self.title = data["title"]
        self.poster = data["poster"]
        self.source_name = data["source_name"]
        self.play_url = data["play_url"]
        self.playcnt = data["playcnt"]
        self.mthid = data["mthid"]
        self.mthpic = data["mthpic"]
        self.threadId = data["threadId"]
        self.duration = data["duration"]
        self.comment_id = data["comment_id"]
        self.publish_time = data["publish_time"]
        self.new_cate_v2 = data["new_cate_v2"]
        self.like = data["like"]
        self.fmlike = data["fmlike"]
        self.comment = data["comment"]
        self.fmcomment = data["fmcomment"]
        self.fmplaycnt = data["fmplaycnt"]
        self.fmplaycnt_2 = data["fmplaycnt_2"]
        self.outstand_tag = data["outstand_tag"]

    def __repr__(self):
        return "<Video>[%s]" % self.id

数据获取

数据获取部分,我们依旧使用之前数据下载章节的代码即可

存储

需要先创建一个数据库

CREATE DATABASE `video` DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_general_ci;

获取数据库连接

db = pymysql.connect(host='192.168.3.30', port=3306, user='root', password='123456', database="video", charset="utf8")
cursor = db.cursor()

创建表

video_sql = """
CREATE TABLE IF NOT EXISTS video(_id INTEGER PRIMARY KEY AUTO_INCREMENT,id VARCHAR(30) NOT NULL,title VARCHAR(300) NOT 
NULL,poster VARCHAR(300) NOT NULL,source_name VARCHAR(300) NOT NULL,play_url VARCHAR(300) NOT NULL,playcnt VARCHAR(40) NOT 
NULL,mthid VARCHAR(30) NOT NULL,mthpic VARCHAR(300) NOT NULL,threadId VARCHAR(30) NOT NULL,duration VARCHAR(30) NOT NULL,
comment_id VARCHAR(30) NOT NULL,publish_time VARCHAR(30) NOT NULL,new_cate_v2 VARCHAR(30) NOT NULL,`like` VARCHAR (40) 
NOT NULL,fmlike VARCHAR(30) NOT NULL,comment VARCHAR(30) NOT NULL,fmcomment VARCHAR(30) NOT NULL,fmplaycnt VARCHAR(30) 
NOT NULL,fmplaycnt_2 VARCHAR(30) NOT NULL,outstand_tag VARCHAR(30) NOT NULL)"""
cursor.execute(video_sql)

抓取数据并存储

我们希望抓取更多分类的数据,所以会准备一个视频分类的list,之后遍历去查询获取。

video_categories = ["recommend", "yingshi", "yinyue", "vlog", "youxi", "gaoxiao", "zongyi", "yule", "dongman",
                    "shenghuo", "guangchangwu", "meishi", "chongwu", "sannong", "junshi", "shehui", "tiyu", "keji",
                    "shishang", "qiche", "qinzi", "jiaoyu", "wenhua", "lvyou", "miaodong"]
for i in range(10):
    for item in video_categories:
        videos = GetVideoData.get_video_data(item)
        save(videos, cursor, db)
        print("{} 已下载".format(item))
        time.sleep(1)

完整的代码

import pymysql
import GetVideoData
import time


def save(videos, cursor, db):
    if videos is None:
        print("videos 为null")
        return
    if len(videos) == 0:
        print("videos 长度为0")
        return
    if cursor is None:
        print("cursor 为null")
        return
    if db is None:
        print("db 为null")
        return
    insert_sql = """INSERT INTO video(`id`,`title`,`poster`,`source_name`,`play_url`,
  `playcnt`,`mthid`,`mthpic`,`threadId`,`duration`,`comment_id`,`publish_time`,`new_cate_v2`,
  `like`,`fmlike`,`comment`,`fmcomment`,`fmplaycnt`,`fmplaycnt_2`,`outstand_tag`) values ( '{}', '{}', '{}', '{}', '{}',
   {}, '{}', '{}', '{}', '{}','{}','{}','{}',{}, '{}', '{}', '{}', '{}', '{}', '{}')
  """
    for video in videos:
        # 先查询是否存在,如果不存在就插入,存在就跳过
        cursor.execute("select count(*) from video where `id` = {};".format(video.id))
        res = cursor.fetchone()
        if res[0] > 0:
            print(video)
            continue
        cursor.execute(
            insert_sql.format(video.id, video.title, video.poster, video.source_name, video.play_url, video.playcnt,
                              video.mthid, video.mthpic, video.threadId, video.duration,
                              video.comment_id, video.publish_time, video.new_cate_v2,
                              video.like,
                              video.fmlike, video.comment, video.fmcomment, video.fmplaycnt, video.fmplaycnt_2,
                              video.outstand_tag))
        db.commit()


db = pymysql.connect(host='192.168.3.30', port=3306, user='root', password='123456', database="video", charset="utf8")
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
video_sql = """
CREATE TABLE IF NOT EXISTS video(_id INTEGER PRIMARY KEY AUTO_INCREMENT,id VARCHAR(30) NOT NULL,title VARCHAR(300) NOT 
NULL,poster VARCHAR(300) NOT NULL,source_name VARCHAR(300) NOT NULL,play_url VARCHAR(300) NOT NULL,playcnt VARCHAR(40) NOT 
NULL,mthid VARCHAR(30) NOT NULL,mthpic VARCHAR(300) NOT NULL,threadId VARCHAR(30) NOT NULL,duration VARCHAR(30) NOT NULL,
comment_id VARCHAR(30) NOT NULL,publish_time VARCHAR(30) NOT NULL,new_cate_v2 VARCHAR(30) NOT NULL,`like` VARCHAR (40) 
NOT NULL,fmlike VARCHAR(30) NOT NULL,comment VARCHAR(30) NOT NULL,fmcomment VARCHAR(30) NOT NULL,fmplaycnt VARCHAR(30) 
NOT NULL,fmplaycnt_2 VARCHAR(30) NOT NULL,outstand_tag VARCHAR(30) NOT NULL)
"""
# 创建数据库
cursor.execute(video_sql)

# 抓取并存入数据
video_categories = ["recommend", "yingshi", "yinyue", "vlog", "youxi", "gaoxiao", "zongyi", "yule", "dongman",
                    "shenghuo", "guangchangwu", "meishi", "chongwu", "sannong", "junshi", "shehui", "tiyu", "keji",
                    "shishang", "qiche", "qinzi", "jiaoyu", "wenhua", "lvyou", "miaodong"]
for i in range(10):
    for item in video_categories:
        videos = GetVideoData.get_video_data(item)
        save(videos, cursor, db)
        print("{} 已下载".format(item))
        time.sleep(1)

db.close()

总结

这一节,完成了数据的抓取并存储到数据库

下一节,我们整理一下当前的代码,并处理一下视频分类数据的处理。