01_resize.py:修改图片尺寸为600*800或者800*600并保存到small文件夹中,若有错误会生成“第一步LOG日志.txt”文件。
#!/usr/bin/env python
# coding=utf-8
# 延伸阅读:https://www.cdsy.xyz/computer/programme/Python/241210/cd64921.html
# 修改图片尺寸为600*800或者800*600并保存到small文件夹中
import os
import cv2
import numpy as np
# 从里面一行行的读取文件夹名字
class Resize():
def __init__(self):
self.file_list = "file_list.txt"
with open(self.file_list,"r")as f:
file = f.read()
file_name_list = file.split()
for name in file_name_list:
img_dir = "./images/" + name
if os.path.exists(img_dir):
self.main(name,img_dir)
def main(self,name,img_dir):
# 存放一些错误信息
error_list = []
# img_dir = "./images/"+name # 以后实际中需要更换
file_name = img_dir.rsplit('/', 1)[1].strip()
small_dir = "./images/small"
# 在small里建立和img_dir中west-4文件名一样的文件夹
small_full_dir = os.path.join(small_dir, file_name)
# 在img_dir文件夹中创建small文件夹,如果没有则创建
if not os.path.exists(small_full_dir):
os.makedirs(small_full_dir)
# 获取所有.jpg的图片名字,并按照升序排列
file_names = sorted([f for f in os.listdir(img_dir) if f.lower().endswith('.jpg')])
print('%s文件夹中一共%s张图片需要处理'%(name,len(file_names)))
# Resizing and save into subfolder
# dim = None
num = 1
for name in file_names:
img_path = os.path.join(img_dir, name)
# 这个可以处理图片放在中文名字的文件夹中
img=cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
# 以下这个不能处理图片放在中文名字的文件夹中,并且img如果返回None说明路径中有中文
# img = cv2.imread(os.path.join(img_dir, name))
# img.shape返回(高度,宽度,通道数)的元组。
# 如果高度大于宽度,图片尺寸也就是3120×4160这种的转换为600×800
if img.shape[0] > img.shape[1]:
dim = (600, 800)
else:
dim = (800, 600)
# 修改尺寸
img_small = cv2.resize(img, dim)
img_small_path = os.path.join(small_full_dir, name)
# 这个不知道为啥返回none
status = cv2.imencode('.jpg', img_small)[1].tofile(img_small_path)
# 返回true或false
# status = cv2.imwrite(img_small_path, img_small)
if not status:
print("第%s张处理成功"%num)
num += 1
print('%s resized copied to %s' % (name, img_small_path))
else:
print("第%s张图片%s处理失败"%(num,name))
error_list.append("第%s张图片%s处理失败"%(num,name))
num += 1
# 生成错误日志文件
log_file = small_full_dir + "/" + name + "第一步LOG日志.txt"
if len(error_list)>0:
with open(log_file, "w")as f:
for i in range(len(error_list)):
f.write(error_list[i] + "\n")
else:
if os.path.exists(log_file):
os.remove(log_file) # 删除日志文件
print('Resizing complete!'+"\n")
if __name__ == "__main__":
try:
resize = Resize()
except Exception as e:
print("出错了:",e)
02_create_xlsx_with_file_names.py:将small文件夹中处理尺寸后的图片名字全部写入到表格中,根据生成的表格,让用户将B列填写完整(无用的图用x/xx/xxx均可,不区分大小写,用户只需要填入有地编号牌子的那一张图即可)。
#!/usr/bin/env python
# coding=utf-8
# 将small文件夹中处理尺寸后的图片名字全部写入到表格中
import os
from openpyxl import Workbook, load_workbook
class Create_xlsx():
def __init__(self):
self.file_list = "file_list.txt"
with open(self.file_list,"r")as f:
file = f.read()
file_name_list = file.split()
for name in file_name_list:
img_dir = "./images/" + name
if os.path.exists(img_dir):
self.main(name)
def main(self,name):
# img_dir = "./images/"+name # 以后实际中需要更换
# file_name = img_dir.rsplit('/', 1)[1].strip()
file_name = name
small_dir = "./images/small"
img_full_dir = os.path.join(small_dir, file_name)
# 保存表格名字
xlsx_name = file_name+ ".xlsx"
filenames = sorted([f for f in os.listdir(img_full_dir) if f.lower().endswith('.jpg')])
# Create xlsx file and write the sorted filenames to the first column
wb = Workbook()
ws = wb.active
for idx, name_ in enumerate(filenames):
ws.cell(row=idx + 1, column=1).value = name_
# Formatting单元格A列宽度
ws.column_dimensions['A'].width = 26
wb.save(os.path.join(img_full_dir, xlsx_name))
wb.close()
print("表格已生成,%s共处理%s张图片"%(name,len(filenames)))
if __name__ == "__main__":
try:
resize = Create_xlsx()
except Exception as e:
print("出错了:",e)
03_generate_new_names:填写完整后的表格生成D/E/F/G等列的内容,包含shadw值、图片修改后的名字、棚号、地编等,如果有重复的地编会变红色,若有错误会生成“第三步LOG日志.txt”文件。(第一列原始图片名字、第二列用户填入的地编号、第三列无用数据为了生成第五列需要、第四列匹配到的shadw值、第五列以shadw值命名的唯一新图片名字、第六列文件夹名字也就是棚号、第七列地编号)
#!/usr/bin/env python
# coding=utf-8
# 根据生成的表格,让用户将B列填写完整后生成D/E/F/G等列的内容,包含shadw值、图片修改后的名字、棚号、地编等
# 如果有重复的地编会变红色一定要先让用户修改,修改完毕后再重新执行此文件
import os
from openpyxl import load_workbook
from openpyxl.styles import Font
from openpyxl.styles import colors
class Generate_new_names():
def __init__(self):
self.file_list = "file_list.txt"
with open(self.file_list,"r")as f:
file = f.read()
file_name_list = file.split()
for name in file_name_list:
img_dir = "./images/" + name
if os.path.exists(img_dir):
self.main(name)
def main(self,name):
# img_dir = "./images/" + name # 以后实际中需要更换
# file_name = img_dir.rsplit('/', 1)[1].strip() # 获取文件夹名字west-4
file_name = name
small_dir = "./images/small"
# ./images/small\west-4
img_full_dir = os.path.join(small_dir, file_name)
# 生成表格名字
xlsx_name = file_name+".xlsx"
# 需要调取的表格名字
shadow_list_excel = "shadow_list.xlsx"
# 打开本地表格shadow_list.xlsx
wb0 = load_workbook(shadow_list_excel)
ws0 = wb0.active
# ws0.dimensions:有数据的单元格区域A1:E4088
# 第一个sheet表名.单元格名,一行是一个元组
# ws0[ws0.dimensions]:(<Cell '棚区露地'.A1>, <Cell '棚区露地'.B1>, <Cell '棚区露地'.C1>, <Cell '棚区露地'.D1>, <Cell '棚区露地'.E1>)
# 组合成字典{('1-8', 'C213'): 'shadow1059-4051'}
shadow_dict = dict()
# 处理shadow_list.xlsx
for c1, c2, c3,c4,c5 in ws0[ws0.dimensions]:
# c1,c2,c3,c4,c5分别为表格中的五列,第二列和第三列组合成元组后是shadow_dict字典的key值
# 我们要用第二列和第三列组合成整体去判断filename_list.xlsx里面的第二列 从而得到唯一的shadow值
if shadow_dict.get((c2.value, c3.value)) is not None:
print('ERROR:重复! shadow_list.xlsx表格中 (%s,%s)出现了多次!'%(c2.value, c3.value))
# raise Exception('重复! when loading %s', c2.value, c3.value)
shadow_dict[(str(c2.value), str(c3.value))] = c4.value
wb0.close()
# 获取表格路径
xlsx_path = os.path.join(img_full_dir, xlsx_name)
wb = load_workbook(xlsx_path)
print('Opened xlsx file successfully!(%s)'%xlsx_path)
ws = wb.active
shadow_id = ""
# Generate new name
label_tmp = ''
j = 0
# 如果需要跳过这个无用图就写列表中的任意一个值
skip_list = ['x', 'xx', 'xxx']
# 存放一些错误信息
error_list = []
# 存储用户输入的地编
all_labels = []
for i in range(len(ws['A'])):
# 第一列生成的图片原始名字
origin_name = ws.cell(row=i+1, column=1).value
# 第二列用户输入的地编号
label = ws.cell(row=i+1, column=2).value
# zfill() 方法返回指定长度的字符串,原字符串右对齐,前面填充0:'00001-'
prefix = str(i+1).zfill(5) + '-'
# If labeled 'xxx', it refers to the img to be skipped.
if label:
# 第二列用户输入的地编号
label = str(label).strip()
if label.lower() in skip_list:
ws.cell(row=i+1, column=3).value = prefix + 'SKIPPED.jpg'
print('%s is skipped!' % origin_name)
# error_list.append('%s is skipped!' % origin_name)
label_tmp = ''
continue
if label != '':
# 如果用户输入的第二列值有重复数据,标红色
if label in all_labels:
#raise Exception('Duplicates!! Dir: %s, Label: %s at row %d.' % (img_dir.rsplit('/', 1)[1], label, i+1))
print('重复!! 文件夹: %s, %d行地编%s之前已经存在.' % (file_name, i+1, label))
ws.cell(i + 1, 2).font = Font(color=colors.RED)
error_list.append('重复!! 文件夹: %s, %d行地编%s之前已经存在.' % (file_name, i+1, label))
else:
ws.cell(i + 1, 2).font = Font(color=colors.BLACK)
if label not in skip_list:
all_labels.append(label)
j = 0
label_tmp = label
# Look for the shadow id with field-name and label
if (file_name, label) not in shadow_dict.keys():
if label not in skip_list:
print("错误的ID", (file_name, label))
error_list.append("错误ID:"+str((file_name,label)))
shadow_id = '未找到'
else:
shadow_id = shadow_dict[(file_name, label)]
label_tmp += '__' + shadow_id
# 00034- + TW12__shadow135-770 + -LABEL.jpg
ws.cell(row=i+1, column=3).value = prefix + label_tmp + '-LABEL.jpg'
if j > 0:
if label_tmp:
new_name = '%s%s-0%d.jpg' % (prefix, label_tmp, j)
# 用不到:00047-TW27__shadow150-860-01.jpg,用来生成第五列
ws.cell(row=i+1, column=3).value = new_name
# 第四列shadw值
ws.cell(row=i+1, column=4).value = shadow_id
# 第五列用shadw值命名的图片名字
ws.cell(row=i+1, column=5).value = new_name.split('__')[1]
# 第六列棚号
ws.cell(row=i+1, column=6).value = file_name
# 第七列地编
ws.cell(row=i+1, column=7).value = label_tmp.split('__')[0]
j += 1
# 生成错误日志文件
log_file = img_full_dir + "/" + file_name + "第三步LOG日志.txt"
if len(error_list)>0:
with open(log_file, "w")as f:
for i in range(len(error_list)):
f.write(error_list[i]+"\n")
else:
if os.path.exists(log_file):
os.remove(log_file)
# 设置列宽
ws.column_dimensions['C'].width = 40
ws.column_dimensions['D'].width = 17
ws.column_dimensions['E'].width = 27
# Save and close
wb.save(xlsx_path)
wb.close()
print('%s数据处理完成!'%name+"\n")
if __name__ == "__main__":
try:
resize = Generate_new_names()
except Exception as e:
print("出错了:",e)
04_rename_all:修改图片名字放到目录/images/final/中,如果第五列中有重复的图片名字会变红色,若有错误会生成“第四步LOG日志.txt”文件。
#!/usr/bin/env python
# coding=utf-8
# 修改图片名字放到目录/images/final/中
import os
from openpyxl import load_workbook
from shutil import copyfile
from openpyxl.styles import Font
from openpyxl.styles import colors
class Rename():
def __init__(self):
self.file_list = "file_list.txt"
with open(self.file_list,"r")as f:
file = f.read()
file_name_list = file.split()
for name in file_name_list:
img_dir = "./images/" + name
if os.path.exists(img_dir):
self.main(name)
def main(self,name):
print('开始重命名...')
# img_dir = "./images/"+name # 以后实际中需要更换
# file_name = img_dir.rsplit('/', 1)[1].strip() # 获取文件夹名字west-4
file_name = name # 获取文件夹名字west-4
small_dir = "./images/small"
# ./images/small\west-4
img_full_dir = os.path.join(small_dir, file_name)
# 生成表格名字
xlsx_name = file_name+".xlsx"
# 获取表格路径
xlsx_path = os.path.join(img_full_dir, xlsx_name)
# 将改后的名字单独放在一个目录中
copy_dir ="./images/final/"+name
wb = load_workbook(xlsx_path)
print('Workbook loaded successfully!')
ws = wb.active
if copy_dir:
if not os.path.exists(copy_dir):
os.makedirs(copy_dir)
print('复制模式: copy to', copy_dir)
else:
print('Rename inplace mode.')
count = 0
# 检测第五列替换后的名字有没有重复的
rename_result=[]
error_list = []
for i in range(len(ws['A'])):
# 将from一列里面的图片名字改成to五列的名字
name_from = str(ws.cell(row=i+1, column=1).value)
name_to = str(ws.cell(row=i + 1, column=5).value)
# 去掉未找到的和空的
if "未找到" not in name_to and name_to != "None":
if copy_dir:
if name_to not in rename_result:
rename_result.append(name_to)
copyfile(os.path.join(img_full_dir, name_from),
os.path.join(copy_dir, name_to))
count += 1
ws.cell(i + 1, 5).font = Font(color=colors.BLACK)
print('%s renamed and copied to %s' % (name_from, copy_dir))
else:
print("注意,small中%s表格第五列中第%s行有重复的图片名字"%(name,i+1))
error_list.append("注意,small中%s表格第五列中第%s行有重复的图片名字"%(name,i+1))
ws.cell(i+1, 5).font = Font(color=colors.RED)
continue
wb.save(xlsx_path)
else:
os.rename(os.path.join(img_full_dir, name_from),
os.path.join(img_full_dir, name_to))
count += 1
print('%s renamed to %s inplace.' % (name_from, name_to))
print('总共%d张图片重命名完成.' % count)
log_file = copy_dir +"/" + name + "第四步LOG日志.txt"
if len(error_list)>0:
with open(log_file, "a")as f:
for i in range(len(error_list)):
f.write(error_list[i]+"\n")
else:
if os.path.exists(log_file):
os.remove(log_file)
wb.close()
print('Workbook closed successfully!')
print('Rename completed!'+"\n")
if __name__ == "__main__":
try:
resize = Rename()
except Exception as e:
print("出错了:",e)
05_merge_excel: 这个是新增加的功能,将第三步中生成表格中有用的数据(4-7列数据)合并到一个表格中:total.xlsx,存放于images目录下,若有错误会在images目录下生成“第五步LOG日志.txt”文件。
#!/usr/bin/env python
# coding=utf-8
# 最后合并每一个excel表格到总表:./images/total.xlsx
import os
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment
import time
class Merge_excel():
def __init__(self):
self.length = 0
self.wb = Workbook()
self.ws = self.wb.active
result_excel = "./images/total.xlsx"
# 写表头
header = ["shadw值", "图片名字", "棚号", "地编"]
self.ws.append(header)
self.file_list = "file_list.txt"
with open(self.file_list, "r")as f:
file = f.read()
file_name_list = file.split()
for name in file_name_list:
img_dir = "./images/small/" + name
if os.path.exists(img_dir):
self.main(name,img_dir)
# 设置列宽
self.ws.column_dimensions['A'].width = 18
self.ws.column_dimensions['B'].width = 26
self.ws.column_dimensions['C'].width = 9
self.ws.column_dimensions['D'].width = 9
# 冻结首行
self.ws.freeze_panes = 'A2'
# 设置第一行加粗垂直居中
for k in range(len(header)):
d = self.ws.cell(row=1, column=k+1)
d.font = Font(bold=True)
d.alignment = Alignment(horizontal='center',vertical='center')
self.wb.save(result_excel)
print("以上数据全部处理完毕,表格存放于%s"%result_excel)
def main(self,name,img_dir):
# img_dir = "./images/small/" + name
# 列表形式['west-4.xlsx']
file_names = [f for f in os.listdir(img_dir) if f.lower().endswith('.xlsx')]
# 表格路径
local_excel_path = os.path.join(img_dir,file_names[0])
# 将需要的数据组装成列表
result_data = []
# 打开本地表格west-4.xlsx
wb0 = load_workbook(local_excel_path)
ws0 = wb0.active
print("打开表格%s成功."%local_excel_path)
for c1, c2, c3, c4, c5,c6,c7 in ws0[ws0.dimensions]:
# 一行需要的数据为一个列表最后放到result_data中
data = []
if c4.value is not None:
data.append(c4.value)
data.append(c5.value)
data.append(c6.value)
data.append(c7.value)
result_data.append(data)
print("写入数据中,共%s行数据需要写入..."%len(result_data))
for i in range(self.length,self.length+len(result_data)):
for j in range(len(result_data[i-self.length])):
self.ws.cell(row=i+2,column=j+1,value=result_data[i-self.length][j])
print("%s写入完毕!"%name + "\n")
# 继续上一个表格的最后一行数据继续写入
self.length += len(result_data)
if __name__ == "__main__":
try:
resize = Merge_excel()
log_file = "./images/第五步LOG日志.txt"
if os.path.exists(log_file):
os.remove(log_file)
except Exception as e:
print("出错了:",e)
log_file = "./images/第五步LOG日志.txt"
if e:
with open(log_file, "w")as f:
f.write(str(e) + "\n")
首先:要将“file_list.txt”补充完整,作用是读取的文件夹名字,文件名字不可更改;
其次:将“shadow_list.xlsx”补充完整,作用是根据第二列和第三列的组合去匹配第四列的shadw值(第一列文件夹中文名、第二列文件夹非中文名字、第三列地编号、第四列唯一的shadw值、第五列上传完示范田后的田id)文件名字不可更改;
最后:按照py文件名字顺序依次执行,处理完02_create_xlsx_with_file_names后,需要添加有标签图片的地编号。