Added dates
This commit is contained in:
parent
27ee15ba2d
commit
00be0b5ff4
1 changed files with 22 additions and 3 deletions
25
parser.py
25
parser.py
|
@ -4,6 +4,7 @@
|
||||||
import sys
|
import sys
|
||||||
import requests
|
import requests
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
from datetime import datetime as dt
|
||||||
import caldav, icalendar
|
import caldav, icalendar
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,30 +33,48 @@ class GoogleSheetsCalParser(HTMLParser):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.inTable = False
|
self.inTable = False
|
||||||
self.inCell = False
|
self.inCell = False
|
||||||
|
self.inDate = False
|
||||||
self.row = -1
|
self.row = -1
|
||||||
self.column = -1
|
self.column = -1
|
||||||
|
self.rowspan = 0
|
||||||
|
self.date = 0
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
|
dAttrs = dict(attrs)
|
||||||
|
|
||||||
if self.inTable:
|
if self.inTable:
|
||||||
if tag == 'tr':
|
if tag == 'tr':
|
||||||
self.row += 1
|
self.row += 1
|
||||||
self.column = -1
|
self.column = -1
|
||||||
elif tag == 'td':
|
elif tag == 'td':
|
||||||
self.column += 1
|
if 'colspan' in dAttrs.keys():
|
||||||
if not (self.row < 3 or (self.row - 2) % 11 < 2):
|
self.column += int(dAttrs['colspan'])
|
||||||
|
else: self.column += 1
|
||||||
|
|
||||||
|
if not (self.row < 3 or (self.row - 2) % 12 < 2):
|
||||||
self.inCell = True
|
self.inCell = True
|
||||||
|
if 'rowspan' in dAttrs.keys():
|
||||||
|
self.rowspan = dAttrs['rowspan']
|
||||||
|
else: self.rowspan = 1
|
||||||
|
elif (self.row - 2) % 12 == 1:
|
||||||
|
self.inDate = True
|
||||||
elif tag == 'tbody':
|
elif tag == 'tbody':
|
||||||
self.inTable = True
|
self.inTable = True
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
if tag == 'td':
|
if tag == 'td':
|
||||||
self.inCell = False
|
self.inCell = False
|
||||||
|
self.inDate = False
|
||||||
elif tag == 'tbody':
|
elif tag == 'tbody':
|
||||||
self.inTable = False
|
self.inTable = False
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
...
|
if self.inDate:
|
||||||
|
self.date = dt.strptime(data, '%d-%b-%y')
|
||||||
|
print(self.date)
|
||||||
|
elif self.inCell and data not in ['', '-']:
|
||||||
|
print(data, self.row, self.column, self.rowspan)
|
||||||
|
|
||||||
|
|
||||||
# Getting the Google Sheet
|
# Getting the Google Sheet
|
||||||
|
|
Reference in a new issue