oracle:fixhtml
Differences
This shows you the differences between two versions of the page.
| Next revision | Previous revision | ||
| oracle:fixhtml [2010/09/13 15:07] – creado rlunaro | oracle:fixhtml [2024/10/05 17:05] (current) – rlunaro | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| - | ====== Fix or Sanitize HTML code from Word ====== | + | ====== Fix or Sanitize HTML ====== |
| Yes: I've found the silver bullet for those of you who are seeking for a function that clean html code or sanitize it, specially if it comes from a cut and paste operation from word. | Yes: I've found the silver bullet for those of you who are seeking for a function that clean html code or sanitize it, specially if it comes from a cut and paste operation from word. | ||
| Line 17: | Line 17: | ||
| Wipes out all the garbage who is in the html code, leaving it --more or less-- " | Wipes out all the garbage who is in the html code, leaving it --more or less-- " | ||
| - | < | + | < |
| - | /* | + | |
| - | */ | + | |
| - | create or replace function strip_html(dirty in clob, | + | |
| to_cvs in number default 0) | to_cvs in number default 0) | ||
| return clob is out clob ; | return clob is out clob ; | ||
| Line 296: | Line 295: | ||
| if to_cvs = 2 then | if to_cvs = 2 then | ||
| -- sanitize (not clean) the html | -- sanitize (not clean) the html | ||
| - | + | ||
| + | -- clean the tag <? | ||
| + | out := regexp_replace(out, | ||
| + | -- clean the tags <img whatever> | ||
| + | out := regexp_replace(out, | ||
| -- clean comments | -- clean comments | ||
| out := regexp_replace(out,'< | out := regexp_replace(out,'< | ||
| Line 309: | Line 312: | ||
| -- clean "class inside tags" | -- clean "class inside tags" | ||
| out := regexp_replace(out,' | out := regexp_replace(out,' | ||
| + | -- clean " | ||
| + | out := regexp_replace(out,' | ||
| -- clean namespaces <o:p> </ | -- clean namespaces <o:p> </ | ||
| out := regexp_replace(out, | out := regexp_replace(out, | ||
| Line 354: | Line 359: | ||
| return(out); | return(out); | ||
| end strip_html; | end strip_html; | ||
| + | |||
oracle/fixhtml.1284390424.txt.gz · Last modified: 2022/12/02 21:02 (external edit)
