|
@@ -5,6 +5,7 @@ from pdfminer.pdfpage import PDFPage
|
|
|
from io import StringIO
|
|
from io import StringIO
|
|
|
import re
|
|
import re
|
|
|
import csv
|
|
import csv
|
|
|
|
|
+import glob, os
|
|
|
|
|
|
|
|
def convert_pdf_to_txt(path):
|
|
def convert_pdf_to_txt(path):
|
|
|
rsrcmgr = PDFResourceManager()
|
|
rsrcmgr = PDFResourceManager()
|
|
@@ -31,17 +32,22 @@ def convert_pdf_to_txt(path):
|
|
|
|
|
|
|
|
regex = r"^Tour Nr:\n\n(\d*)\n\nAbfahrt:.*?StellPl.Maut.*?^(\d*,\d*|0)\sKM$"
|
|
regex = r"^Tour Nr:\n\n(\d*)\n\nAbfahrt:.*?StellPl.Maut.*?^(\d*,\d*|0)\sKM$"
|
|
|
|
|
|
|
|
-out = convert_pdf_to_txt("./vp.pdf")
|
|
|
|
|
-
|
|
|
|
|
-matches = re.finditer(regex, out, re.MULTILINE | re.DOTALL)
|
|
|
|
|
-with open('eggs.csv', 'w', newline='') as csvfile:
|
|
|
|
|
- spamwriter = csv.writer(csvfile, delimiter=';',
|
|
|
|
|
- quotechar='|', quoting=csv.QUOTE_MINIMAL)
|
|
|
|
|
-
|
|
|
|
|
- i = 1
|
|
|
|
|
-
|
|
|
|
|
- for matchNum, match in enumerate(matches, start=1):
|
|
|
|
|
- ##print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
|
|
|
|
|
- spamwriter.writerow([str("=\"" + match.group(1) + "\"")] + [str(match.group(2)).replace(",",".")])
|
|
|
|
|
- print ("Tour " + str(i) + "," + str(match.group(1)) + "," + str(match.group(2)).replace(",","."))
|
|
|
|
|
- i = i+1
|
|
|
|
|
|
|
+os.chdir("./")
|
|
|
|
|
+print("moin")
|
|
|
|
|
+for file in glob.glob("*.pdf"):
|
|
|
|
|
+ print("Beginne mit " + file + " alles guddes!")
|
|
|
|
|
+ out = convert_pdf_to_txt(file)
|
|
|
|
|
+
|
|
|
|
|
+ matches = re.finditer(regex, out, re.MULTILINE | re.DOTALL)
|
|
|
|
|
+ with open(file + ".csv", 'w', newline='') as csvfile:
|
|
|
|
|
+ spamwriter = csv.writer(csvfile, delimiter=';',
|
|
|
|
|
+ quotechar='|', quoting=csv.QUOTE_MINIMAL)
|
|
|
|
|
+
|
|
|
|
|
+ i = 1
|
|
|
|
|
+
|
|
|
|
|
+ for matchNum, match in enumerate(matches, start=1):
|
|
|
|
|
+ ##print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
|
|
|
|
|
+ spamwriter.writerow([str("=\"" + match.group(1) + "\"")] + [str(match.group(2)).replace(",",".")])
|
|
|
|
|
+ print ("Tour " + str(i) + "," + str(match.group(1)) + "," + str(match.group(2)).replace(",","."))
|
|
|
|
|
+ i = i+1
|
|
|
|
|
+ print("ok")
|