This repository has been archived on 2020-10-14. You can view files and clone it, but cannot push or open issues or pull requests.
edt-parser/parser.py

170 lines
5.8 KiB
Python
Raw Normal View History

2020-09-20 20:53:24 +02:00
#!/usr/bin/env python
#-*- encoding: utf-8
2020-09-20 21:11:48 +02:00
import sys
from progressbar import progressbar
2020-09-20 20:53:24 +02:00
import requests
from html.parser import HTMLParser
2020-09-20 22:11:47 +02:00
from datetime import datetime as dt, time
import caldav
from icalendar import Calendar, Event
import pytz
2020-09-20 20:53:24 +02:00
2020-09-20 21:11:48 +02:00
# Constants definition
URL = "https://docs.google.com/spreadsheets/u/0/d/e/2PACX-1vQ9yzFLr5mXbIZVK3ucdUZuScAbLoCyPqzHr-5V0aYeCFEz7LuidPdk_EnkkJT-zjemzQQHaKvpeXW2/pubhtml/sheet?headers=false&gid=1619638924"
2020-09-20 21:23:23 +02:00
TIMETABLE = [
('08:00','08:00'),
('09:00','09:00'),
('10:00','10:15'),
('11:15','11:15'),
('12:15','12:15'),
('13:00','13:00'),
('13:30','13:30'),
('14:30','14:30'),
('15:30','15:45'),
('16:45','16:45'),
('17:45','17:45'),
]
DAVURL = "https://cal.edgarpierre.fr"
2020-09-20 22:43:25 +02:00
CALURL = "https://cal.edgarpierre.fr/edpibu/817427eb-be7c-4540-872a-dfb723a205a6/"
2020-09-20 21:11:48 +02:00
class GoogleSheetsCalParser(HTMLParser):
2020-09-20 20:55:27 +02:00
"""
Definition of a Google Sheets parser providing a table with each cell
and its position and size
"""
2020-09-20 21:11:48 +02:00
def __init__(self):
self.inTable = False
2020-09-20 21:26:53 +02:00
self.inCell = False
2020-09-20 21:51:41 +02:00
self.inDate = False
self.inEvent = False
2020-09-20 21:23:23 +02:00
self.row = -1
self.column = -1
self.nextCol = -1
2020-09-20 21:51:41 +02:00
self.rowspan = 0
2020-09-21 07:30:57 +02:00
self.multirows = {}
2020-09-20 22:43:25 +02:00
self.date = {}
2020-09-21 07:49:26 +02:00
self.event = Event()
self.event['uid'] = 0
2020-10-03 18:53:41 +02:00
self.uids = []
2020-09-20 22:43:25 +02:00
import priv
client = caldav.DAVClient(url=DAVURL, username=priv.USERNAME,
password=priv.PASSWORD)
self.calendar = caldav.Calendar(client=client, url=CALURL)
#print(self.calendar.event_by_uid('42x26').data)
2020-09-20 22:43:25 +02:00
2020-09-20 21:14:12 +02:00
HTMLParser.__init__(self)
2020-09-20 21:11:48 +02:00
2020-09-20 20:53:24 +02:00
def handle_starttag(self, tag, attrs):
2020-09-20 21:51:41 +02:00
dAttrs = dict(attrs)
2020-09-20 21:14:12 +02:00
if self.inTable:
2020-09-20 21:23:23 +02:00
if tag == 'tr':
self.row += 1
self.nextCol = -1
2020-09-20 21:23:23 +02:00
elif tag == 'td':
2020-09-21 07:30:57 +02:00
while self.nextCol in self.multirows.keys() and self.multirows[self.nextCol][1] > 0:
self.nextCol += self.multirows[self.nextCol][0]
self.column = self.nextCol
2020-09-20 21:51:41 +02:00
if 'colspan' in dAttrs.keys():
self.nextCol += int(dAttrs['colspan'])
else: self.nextCol += 1
2020-09-20 21:51:41 +02:00
if not (self.row < 3 or (self.row - 2) % 12 < 2):
2020-09-20 21:26:53 +02:00
self.inCell = True
2020-09-20 21:51:41 +02:00
if 'rowspan' in dAttrs.keys():
2020-09-20 21:54:57 +02:00
self.rowspan = int(dAttrs['rowspan'])
self.multirows[self.column] = [self.nextCol - self.column, self.rowspan]
2020-09-20 21:51:41 +02:00
else: self.rowspan = 1
elif (self.row - 2) % 12 == 1:
self.inDate = True
2020-09-20 21:23:23 +02:00
elif tag == 'tbody':
2020-09-20 21:14:12 +02:00
self.inTable = True
2020-09-20 20:53:24 +02:00
def handle_endtag(self, tag):
2020-09-20 21:26:53 +02:00
if tag == 'td':
self.inCell = False
2020-09-20 21:51:41 +02:00
self.inDate = False
if self.inEvent:
self.inEvent = False
cal = Calendar()
cal.add('prodid', '-//edpibu//edt-parser//FR')
cal.add('version', '2.0')
cal.add_component(self.event)
2020-10-03 18:53:41 +02:00
self.uids.append(self.event['uid'])
try:
ev = self.calendar.event_by_uid(self.event['uid'])
except caldav.lib.error.NotFoundError:
ev = None
if ev:
2020-10-03 18:53:41 +02:00
icev = Calendar.from_ical(ev.data).subcomponents[1]
ex = True
2020-10-03 18:53:41 +02:00
if icev['summary'] != self.event['summary']:
2020-10-03 18:14:11 +02:00
ex = False
else:
for key in ['dtstart', 'dtend']:
2020-10-03 18:53:41 +02:00
if icev[key].to_ical() != self.event[key].to_ical():
2020-10-03 18:14:11 +02:00
ex = False
if ex: return
2020-10-03 18:14:11 +02:00
print(f'Change on {self.event.decoded("dtstart").isoformat()}')
ev.data = cal.to_ical().decode('utf-8')
ev.save()
return
2020-10-03 18:14:11 +02:00
print(f'New event on {self.event.decoded("dtstart").isoformat()}')
self.calendar.save_event(cal.to_ical().decode('utf-8'))
2020-09-21 07:30:57 +02:00
elif tag == 'tr':
for mr in self.multirows.values():
mr[1] -= 1
2020-09-20 21:26:53 +02:00
elif tag == 'tbody':
self.inTable = False
2020-10-03 18:53:41 +02:00
for ev in self.calendar.events():
icev = Calendar.from_ical(ev.data).subcomponents[1]
if icev['uid'] not in self.uids:
ev.delete()
print(f'Deleted event on {icev.decoded("dtstart").isoformat()}')
2020-09-20 20:53:24 +02:00
def handle_data(self, data):
2020-09-20 21:51:41 +02:00
if self.inDate:
2020-09-20 22:43:25 +02:00
self.date[self.column] = dt.strptime(data, '%d-%b-%y').date()
2020-09-20 21:51:41 +02:00
elif self.inCell and data not in ['', '-']:
self.inEvent = True
2020-09-20 22:11:47 +02:00
times = [time.fromisoformat(TIMETABLE[(self.row - 2) % 12 - 2][1]),
time.fromisoformat(TIMETABLE[(self.row - 2) % 12 - 2 + self.rowspan][0])]
2020-09-21 07:49:26 +02:00
if self.event['uid'] == f'{self.row}x{self.column}':
self.event['summary'] = f'{self.event["summary"]} {data}'
else:
self.event = Event()
self.event.add('uid', f'{self.row}x{self.column}')
self.event.add('summary', data)
self.event.add('dtstamp', dt.now())
self.event.add('dtstart', dt.combine(self.date[self.column], times[0],
pytz.timezone('Europe/Paris')))
self.event.add('dtend', dt.combine(self.date[self.column], times[1],
pytz.timezone('Europe/Paris')))
2020-09-20 20:53:24 +02:00
2020-09-20 20:55:27 +02:00
# Getting the Google Sheet
2020-09-20 21:11:48 +02:00
try:
r = requests.get(URL)
except ConnectionError as e:
print(e)
sys.exit(1)
2020-09-20 20:53:24 +02:00
2020-09-20 21:11:48 +02:00
if r.status_code != 200:
print(f'Status Code {r.status_code}; could not continue.')
sys.exit(1)
2020-09-20 20:55:27 +02:00
# Parsing the Sheet
2020-09-20 21:14:12 +02:00
calParser = GoogleSheetsCalParser()
2020-09-20 21:11:48 +02:00
calParser.feed(r.text)