from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO
import re
import csv
import glob, os

def convert_pdf_to_txt(path):
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    codec = 'utf-8'
    laparams = LAParams()
    device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
    fp = open(path, 'rb')
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    password = ""
    maxpages = 0
    caching = True
    pagenos=set()

    for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
        interpreter.process_page(page)

    text = retstr.getvalue()

    fp.close()
    device.close()
    retstr.close()
    return text

regex = r"^Tour Nr:\n\n(\d*)\n\nAbfahrt:.*?StellPl.Maut.*?^(\d*,\d*|0)\sKM$"

os.chdir("./")
print("moin")
for file in glob.glob("*.pdf"):
    print("Beginne mit " + file + " alles guddes!")
    out = convert_pdf_to_txt(file)

    matches = re.finditer(regex, out, re.MULTILINE | re.DOTALL)
    with open(file + ".csv", 'w', newline='') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=';',
                                quotechar='|', quoting=csv.QUOTE_MINIMAL)

        i = 1

        for matchNum, match in enumerate(matches, start=1):
            ##print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
            spamwriter.writerow([str("=\"" + match.group(1) + "\"")] + [str(match.group(2)).replace(",",".")])
            print ("Tour " + str(i) + "," + str(match.group(1)) + "," + str(match.group(2)).replace(",","."))
            i = i+1
    print("ok")