From 00be0b5ff449f0f50cb1548bd6e76e8f8953afc6 Mon Sep 17 00:00:00 2001 From: edpibu Date: Sun, 20 Sep 2020 21:51:41 +0200 Subject: [PATCH] Added dates --- parser.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/parser.py b/parser.py index fee8b61..0b79251 100755 --- a/parser.py +++ b/parser.py @@ -4,6 +4,7 @@ import sys import requests from html.parser import HTMLParser +from datetime import datetime as dt import caldav, icalendar @@ -32,30 +33,48 @@ class GoogleSheetsCalParser(HTMLParser): def __init__(self): self.inTable = False self.inCell = False + self.inDate = False self.row = -1 self.column = -1 + self.rowspan = 0 + self.date = 0 HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): + dAttrs = dict(attrs) + if self.inTable: if tag == 'tr': self.row += 1 self.column = -1 elif tag == 'td': - self.column += 1 - if not (self.row < 3 or (self.row - 2) % 11 < 2): + if 'colspan' in dAttrs.keys(): + self.column += int(dAttrs['colspan']) + else: self.column += 1 + + if not (self.row < 3 or (self.row - 2) % 12 < 2): self.inCell = True + if 'rowspan' in dAttrs.keys(): + self.rowspan = dAttrs['rowspan'] + else: self.rowspan = 1 + elif (self.row - 2) % 12 == 1: + self.inDate = True elif tag == 'tbody': self.inTable = True def handle_endtag(self, tag): if tag == 'td': self.inCell = False + self.inDate = False elif tag == 'tbody': self.inTable = False def handle_data(self, data): - ... + if self.inDate: + self.date = dt.strptime(data, '%d-%b-%y') + print(self.date) + elif self.inCell and data not in ['', '-']: + print(data, self.row, self.column, self.rowspan) # Getting the Google Sheet