|
把分给我吧, 正好写过一个过滤的应用
- #include "stdafx.h"
- #include
- #include
- #pragma warning(disable:4244 4267)
- ////////////////////////////////////////////////////////////////////////////////////////////////////////
- template struct Str_LookupA; // ANSI字符集字符串查找模板
- template struct Str_LookupW; // UNICODE 字符集字串查找模板
- template struct Str_Filter_Word; // 字符串过滤模板框架 -- 只过滤输入单词
- template struct Str_Filter_Line; // 字符串过滤模板框架 -- 过滤包含输入单词段落
- ////////////////////////////////////////////////////////////////////////////////////////////////////////
- //
- // 模板参数 t_fMatch, 指明查找字串是否区分大小写。
- // t_fMatch = true,查找函数将不区分大小写
- // t_fMatch = false, 默认方式,按照输入查找字串进行匹配
- //
- template struct Str_LookupA
- {
- enum { TYPE_SIZE = sizeof(CHAR)};
- typedef CHAR* _Type;
- typedef const CHAR* _constType;
- typedef _Type _ReturnType;
- _ReturnType operator()(_Type pSrc,_constType pSearch)
- {
- return t_fMatch ? StrStrIA(pSrc,pSearch) : StrStrA(pSrc,pSearch);
- }
- _ReturnType LineStart(_Type pSrc,_Type pLast)
- {
- _Type pStart = StrRStrIA(pSrc,pLast,"\r\n");
- return pStart ? pStart + 2 : pSrc;
- }
- _ReturnType LineEnd(_Type pSrc)
- {
- _Type pEnd = StrStrA(pSrc,"\r\n");
- return pEnd ? pEnd + 2 : NULL;
- }
- DWORD Length(_constType pStr)
- {
- return lstrlenA(pStr);
- }
- };
- template struct Str_LookupW
- {
- enum { TYPE_SIZE = sizeof(WCHAR)};
- typedef WCHAR* _Type;
- typedef _Type _ReturnType;
- typedef const WCHAR* _constType;
- _ReturnType operator()(_Type pSrc, _constType pSearch)
- {
- return t_fMatch ? StrStrIW(pSrc,pSearch) : StrStrW(pSrc,pSearch);
- }
- _ReturnType LineStart(_Type pSrc,_Type pLast)
- {
- _Type pStart = StrRStrIW(pSrc,pLast,L"\r\n");
- return pStart ? pStart + 2 : pSrc;
- }
- _ReturnType LineEnd(_Type pSrc)
- {
- _Type pEnd = StrStrW(pSrc,L"\r\n");
- return pEnd ? pEnd + 2 : NULL;
- }
- DWORD Length(_constType pStr)
- {
- return lstrlenW(pStr);
- }
- };
- template >
- struct Str_Filter_Word
- {
- typedef typename _RT::_Type _Type;
- typedef typename _RT::_constType _constType;
- typedef typename _RT::_ReturnType _ReturnType;
- _ReturnType operator()(_Type pBuffer, DWORD cbSize, _constType *pWord, DWORD cbCount, DWORD* pcbRemoved = NULL)
- {
- ATLASSERT(pBuffer && pWord && cbSize && cbCount);
- _RT Str_Lookup;
- DWORD dwRemoved = 0;
- DWORD cbStart = 0;
- DWORD cbEnd = cbSize;
- for (DWORD i = 0; i < cbCount; i++)
- {
- _Type pStart = pBuffer;
- _Type pEnd = pBuffer + cbEnd;
- _constType pSearch = pWord[i];
- DWORD cbLength = Str_Lookup.Length(pSearch);
- if (!pSearch)
- break;
- for (;;)
- {
- _Type p = Str_Lookup(pStart,pSearch);
- if (!p) break;
- dwRemoved++;
- _Type pNext = p + cbLength;
- if (pNext && pNext <=pEnd)
- {
- cbStart = p - pBuffer;
- memmove(p,pNext,_RT::TYPE_SIZE * (cbEnd - cbStart));
- pStart = p;
- cbEnd -= cbLength;
- }
- else
- {
- memset(pStart,0, _RT::TYPE_SIZE * (cbEnd - cbStart));
- break;
- }
- }
- }
- if (pcbRemoved) *pcbRemoved = dwRemoved;
- return pBuffer;
- }
- _ReturnType operator()(_Type pBuffer, DWORD cbSize, _constType pWord, DWORD* pcbRemoved = NULL)
- {
- _constType pWords[] = {pWord,0};
- return operator()(pBuffer,cbSize,pWords,1,pcbRemoved);
- }
- };
- template >
- struct Str_Filter_Line
- {
- typedef typename _RT::_Type _Type;
- typedef typename _RT::_constType _constType;
- typedef typename _RT::_ReturnType _ReturnType;
- _ReturnType operator()(_Type pBuffer, DWORD cbSize, _constType *pWord, DWORD cbCount, DWORD* pcbRemoved = NULL)
- {
- ATLASSERT(pBuffer && pWord && cbSize && cbCount);
- _RT Str_Lookup;
- DWORD dwRemoved = 0;
- DWORD cbStart = 0;
- DWORD cbEnd = cbSize;
- for (DWORD i = 0; i < cbCount; i++)
- {
- _Type pStart = pBuffer;
- _Type pEnd = pBuffer + cbEnd;
- _constType pSearch = pWord[i];
- if (!pSearch) break;
- for (;;)
- {
- _Type p = Str_Lookup(pStart,pSearch);
- if (!p) break;
- dwRemoved++;
- pStart = Str_Lookup.LineStart(pBuffer,p);
- _Type pLineEnd = Str_Lookup.LineEnd(p);
- if (pLineEnd)
- {
- cbStart = pStart - pBuffer;
- memmove(pStart,pLineEnd,_RT::TYPE_SIZE * (cbEnd - cbStart));
- cbEnd -= (pLineEnd - pStart);
- }
- else
- {
- memset(pStart,0, _RT::TYPE_SIZE * (cbEnd - cbStart));
- break;
- }
- }
- }
- if (pcbRemoved) *pcbRemoved = dwRemoved;
- return pBuffer;
- }
- _ReturnType operator()(_Type pBuffer, DWORD cbSize, _constType pWord, DWORD* pcbRemoved = NULL)
- {
- _constType pWords[] = {pWord,0};
- return operator()(pBuffer,cbSize,pWords,1,pcbRemoved);
- }
- };
- HRESULT test_file_filter(LPCTSTR lpszFile, LPCSTR *pWords, DWORD cbWords, bool fLine = false)
- {
- ATL::CAtlFileMapping map;
- ATL::CAtlFile file;
- HRESULT hr;
- ULONGLONG uSize = 0;
- if (FAILED(hr = file.Create(lpszFile,GENERIC_READ | GENERIC_WRITE,0,OPEN_EXISTING)))
- return hr;
- if (FAILED(hr = file.GetSize(uSize)))
- return hr;
- size_t nSize = (size_t)uSize;
- if (FAILED(hr = map.MapFile(file.m_h,nSize,0,PAGE_READWRITE,FILE_MAP_READ | FILE_MAP_WRITE)))
- return hr;
-
- LPSTR pData = (LPSTR)map.GetData();
- DWORD cbRemoved = 0;
- if (fLine)
- {
- // 区分大小写, 过滤包含指定单词的段落
- Str_Filter_Line<> filter;
- filter(pData,nSize,pWords,cbWords,&cbRemoved);
- }
- else
- {
- // 以不区分大小写的模式,过滤指定单词
- Str_Filter_Word filter;
- filter(pData,nSize,pWords,cbWords,&cbRemoved);
- }
- printf(pData);
- return S_OK;
- }
- int _tmain(int argc, _TCHAR* argv[])
- {
- // 过滤指定文件中多个单词
- LPCSTR pWords[] = {"Sec","Time","北国","sample","for",0};
- HRESULT hr = test_file_filter(_T("c:\\100.txt"),pWords,5);
- if (FAILED(hr))
- printf("failed! hr = 0x08X\n",hr);
- // 过滤文件中包含指定单词的段落
- hr = test_file_filter(_T("c:\\300.txt"),pWords,5,true);
- if (FAILED(hr))
- printf("failed! hr = 0x08X\n",hr);
- _getch();
- return 0;
- }
复制代码
|
|