; rss.vnm ; test parsing an RSS feed with text analyser ; ; Illustates fetching a simple http URL and use of the ; text analyser V2. ; For Venom 2 ; 2010 11 09 : updated as URL of BBC news feed has changed To init New Ethernet Make tcp TCProt Make pa TextAnalyser(tcp, 0) Make s String(100) End To main ; note how the URL ; "http://feeds.bbci.co.uk/news/rss.xml?edition=uk" ; is broken down. ; HTTP uses port 80, which we use when opening the connection. ; "feeds.bbci.co.uk" is is the hostname, which we use when opening ; the connection and again in a "host:" header after sending the GET request ; "/news/rss.xml?edition=uk" is the path we specify in the GET request ; If tcp.Open("feeds.bbci.co.uk", 80) [ Print "http connection opened",CR Print To tcp, "GET /news/rss.xml?edition=uk HTTP/1.1",CR, "host: feeds.bbci.co.uk",CR, CR Print "=headers=",CR While tcp.Get(s) > 0 ; read headers until empty line = end of headers Print s, CR Print "==",CR pa.Reset ; every title we want to display is a <title> element inside an <item> element ; there are a couple of <title> elements at the beginning of the file that ; do not apply tcp.TimeOut := 5000 While pa.Find("<item>", 0) [ If pa.Find("<title>", 0) [ pa.Get(s, "<") convert_entity(s) Print "Title: ", s, CR ] ] tcp.Close tcp.Reset ] Else Print "tcp open failed, status ", tcp.Status:1, CR tcp.Reset End ; table for converting certain HTML codes back into characters */ ; the first entry in each pair is a string preceded by '&' and ; followed by ';' in the XML/HTML ; e.g. "&amp;" represents '&' Array entities("", 8) "amp" "&" "apos" "'" "pound" "£" "quot", "\"" End ; convert XML/HTML character entity codes in a string To convert_entity(s) Local c Local n AutoDestruct Local s1 := New String(100) ; holder for converted string Local pa1 := New TextAnalyser(s, 1) s.Reset s1.Empty While s.Queue [ c := pa1.Get If c = '&' [ ; this uses TextAnalyser "array of names" feature n := pa1.Get(entities, ";") If n <> -1 Print To s1, entities.(n + 1) ] Else s1.Put(c) ] s.Empty s.Put(s1) ; copy converted string back to original End