#endif
#ifndef REPLACEALL_H
#define REPLACEALL_H #include <string> using std::string; std::string& replaceAll(std::string& context,const std::string& from,const std::string& to) { size_t lookHere=0;size_t foundHere;while((foundHere=context.find(from,lookHere))!=string::npos){ context.replace(foundHere,from.size(),to);lookHere=foundHere+to.size();}return context; } #endif#include <cstddef>
#include <cstdlib> #include <fstream> #include <iostream> #include <sstream> #include <stdexcept> #include <string> #include "ReplaceAll.h" #include "require.h" using namespace std; string& stripHTMLTags(string& s) throw(runtime_error)//-->warning { size_t leftPos;while((leftPos=s.find('<'))!=string::npos){ size_t rightPos=s.find('>',leftPos+1);if(rightPos==string::npos){ ostringstream msg;msg<<"Incomplete HTML tag starting in position "<<leftPos;throw runtime_error(msg.str());}s.erase(leftPos,rightPos-leftPos+1);}//移除所有的特殊HTML字符replaceAll(s,"<","<");replaceAll(s,">",">");replaceAll(s,"&","&");replaceAll(s," "," ");//Etc...return s; } int main(int argc,char* argv[1]) { requireArgs(argc,1,"usage: HTMLStripper2 InputFile");//这种方法应该在命令行(进入当前可执行文件目录后)中输入删除HTML标记.exe test1.htmlifstream in(argv[1]);assure(in,argv[1]);//读取整个文件到字符串,然后分割字符串ostringstream ss;ss<<in.rdbuf();try{ string s=ss.str();cout<<stripHTMLTags(s)<<endl;return EXIT_SUCCESS;}catch(runtime_error& x){ cout<<x.what()<<endl;return EXIT_FAILURE;}system("pause");return 0; }