Now images with no readable text are renamed with an UUID !!

This commit is contained in:
StarShine 2020-11-01 13:36:11 +01:00
parent 3f7f9fd958
commit f20e488518
1 changed files with 15 additions and 6 deletions

View File

@ -8,6 +8,7 @@ except ImportError:
import pytesseract import pytesseract
import os import os
import string import string
import uuid
namearray = [] namearray = []
indexnow = 0 indexnow = 0
@ -36,8 +37,7 @@ def namecleaner(filename):
filename = filename.replace("?","") filename = filename.replace("?","")
filename = filename.replace("*","") filename = filename.replace("*","")
################################## ##################################
filename = filename.replace(".","") # Get rid of extra dots outputname = filename.replace(".","") # Get rid of extra dots
outputname = filename+basename[len(basename)-4:len(basename)] # Get the extension of the file
print("outputnameis "+outputname) print("outputnameis "+outputname)
return outputname return outputname
@ -55,7 +55,7 @@ absolutebasepathout = os.path.abspath('.\ImagesOutput')
print("absolutebasepathout = "+absolutebasepathout) print("absolutebasepathout = "+absolutebasepathout)
# List all files in a directory using scandir() # List all files in a directory using scandir()
with os.scandir(basepathin) as entries: with os.scandir(absolutebasepathin) as entries:
for entry in entries: for entry in entries:
if entry.is_file(): if entry.is_file():
# Fill an array with the list # Fill an array with the list
@ -75,9 +75,18 @@ while indexnow < arraylength:
# Call "namecleaner" to get rid of forbiden characters, line breaks and spaces. # Call "namecleaner" to get rid of forbiden characters, line breaks and spaces.
cleanname = namecleaner(newname) cleanname = namecleaner(newname)
print("cleanname = "+cleanname) print("cleanname = "+cleanname)
if cleanname != "":
cleanname = cleanname + basename[len(basename)-4:len(basename)]
os.rename(absolutebasepathin+"\\"+basename, absolutebasepathout+"\\"+cleanname) os.rename(absolutebasepathin+"\\"+basename, absolutebasepathout+"\\"+cleanname)
print(basename+" is now renamed as "+cleanname) print(basename+" is now renamed as "+cleanname)
indexnow = indexnow + 1 else:
UUIDnow = str(uuid.uuid4())
cleanname = namecleaner(UUIDnow)
cleanname = cleanname + basename[len(basename)-4:len(basename)]
os.rename(absolutebasepathin+"\\"+basename, absolutebasepathout+"\\"+cleanname)
print(basename+" is now "+cleanname)
indexnow += 1
print("All images are given a name") print("All images are given a name")