Created
May 25, 2015 18:10
-
-
Save Junzki/b12822085c011019c4b6 to your computer and use it in GitHub Desktop.
http://wwj718.github.io/regex-demo.html --> CSharp Version
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Text.RegularExpressions; | |
namespace zh_clean_csharp | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
string PATH = args[0]; | |
string FILETYPE = args[1]; | |
string[] files = Directory.GetFiles(PATH); | |
string[] filtered = FileFilter(files, FILETYPE); | |
foreach (string file in filtered) | |
{ | |
FilterAndOutput(file, FILETYPE); | |
Console.WriteLine(GetFileName(file, FILETYPE) + "......OK"); | |
} | |
Console.WriteLine("Press any key to continue..."); | |
Console.ReadKey(true); | |
} | |
public static void FilterAndOutput(string file, string FILETYPE) | |
{ | |
string output_filename; | |
output_filename = file.Replace(GetFileName(file, FILETYPE), GetFileName(file, FILETYPE) + "_Filtered"); | |
using (StreamWriter writer = new StreamWriter(output_filename)) | |
{ | |
using (StreamReader reader = new StreamReader(file)) | |
{ | |
string line; | |
line = reader.ReadLine(); | |
while (line != null) | |
{ | |
var to_write = TextFilter(line); | |
writer.WriteLine(to_write); | |
line = reader.ReadLine(); | |
} | |
} | |
} | |
} | |
public static string[] FileFilter(string[] files, string FILETYPE) | |
{ | |
List<string> filtered_files = new List<string>(); | |
Regex pattern = new Regex(".+\\." + FILETYPE.ToLower() + "$"); | |
foreach (string file in files) | |
{ | |
if (pattern.IsMatch(file.ToLower())) | |
filtered_files.Add(file); | |
} | |
return filtered_files.ToArray(); | |
} | |
private static string GetFileName(string file, string FILETYPE) | |
{ | |
Regex pattern = new Regex("\\." + FILETYPE.ToLower() + "$"); | |
Match match; | |
string filename; | |
string cache; | |
match = pattern.Match(file); | |
cache = file.Remove(file.IndexOf(match.Value), match.Value.Length); | |
filename = cache.Split('\\').Last<string>(); | |
return filename; | |
} | |
public static string TextFilter(string origin_data) | |
{ | |
Regex pattern = new Regex("[\\u4e00-\\u9fa5].+$"); | |
string output_str; | |
Match match; | |
match = pattern.Match(origin_data); | |
output_str = origin_data.Remove(origin_data.IndexOf(match.Value), match.Value.Length); | |
return output_str; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment