java - 排序 - java大文件排序器

摘要:1个未分类的大文件,变成多个分类文件(即,12 ),第一次排序合并将它转换为6个文件,第二次排序合并将它转换为3个文件,第三次合并将它转换为2个文件,第四次合并将它再次转换为1个文件,代码:


package Assignment11;


import java.io.BufferedWriter;


import java.io.File;


import java.io.FileNotFoundException;


import java.io.FileWriter;


import java.io.IOException;


import java.util.ArrayList;


import java.util.Collections;


import java.util.Scanner;



public class FileSorter_1 


{


public static ArrayList<String> storyline = new ArrayList<String>();


public static int num_lines = 100000; //this number can be changed


public static int num_files_initial;


public static int num_files_sec;



public static void main(String[] args) throws IOException 


{


 phase1();


 phase2();



}


public static void phase1() throws IOException 


{


 Scanner story = new Scanner(new File("Aesop_Shakespeare_Shelley_Twain.txt")); //file name



 int f = 0;


 while(story.hasNext()) 


 {


 int i = 0;


 while(story.hasNext())


 {


 String temp = story.next(); 


 storyline.add(temp);


 i++;


 if(i > num_lines) 


 {


 break;


 }


 }


 Collections.sort(storyline, String.CASE_INSENSITIVE_ORDER);



 BufferedWriter write2file = new BufferedWriter(new FileWriter("temp_0_" + f +".txt")); //initialze new file


 for(int x = 0; x<num_lines;x++) 


 {


 write2file.write(storyline.get(x)); 


 write2file.newLine(); 


 }


 write2file.close();



 f++;


 }


 num_files_initial = f;



}



public static void phase2() throws IOException 


{


 int file_n = 1;


 int prev_fn = 0;


 int t = 0;


 int g = 0;


 while(g<5) 


 {


 System.out.println(num_files_initial);


 if(t+1 > num_files_initial-1)


 {


 if(num_files_initial % 2 != 0)


 {



 BufferedWriter w = new BufferedWriter(new FileWriter("temp_"+file_n +"_" + g +".txt"));


 Scanner file1 = new Scanner(new File("temp_"+prev_fn +"_" + t +".txt"));


 String word1 = file1.next();


 while(file1.hasNext())


 {



 w.write(word1);


 w.newLine(); 


 }


 g++;



 break;



 }



 num_files_initial = num_files_initial / 2 + num_files_initial % 2;


 g = 0;


 t = 0;


 file_n++;


 prev_fn++;



 }


 String s1="temp_"+file_n +"_" + g +".txt";


 String s2="temp_"+prev_fn +"_" + t +".txt";


 String s3="temp_"+prev_fn +"_" + (t+1) +".txt";


 System.out.println(s2);


 System.out.println(s3);


 BufferedWriter w = new BufferedWriter(new FileWriter(s1));


 Scanner file1 = new Scanner(new File(s2));


 Scanner file2 = new Scanner(new File(s3));



 String word1 = file1.next();


 String word2 = file2.next();



 System.out.println(num_files_initial);



 //System.out.println(t);


 //System.out.println(g);



 while(file1.hasNext() && file2.hasNext())


 {



 if(word1.compareTo(word2) == 1) //if word 1 comes first = 1


 {


 w.write(word1);


 w.newLine();


 file1.next();


 }


 if(word1.compareTo(word2) == 0) //if word 1 comes second = 0


 {


 w.write(word2);


 w.newLine();


 file2.next();


 }


 } 


 while(file1.hasNext()) 


 {



 w.write(word1);


 w.newLine();


 break;


 }


 while(file2.hasNext()) 


 {



 w.write(word2);


 w.newLine();


 break;


 }



 g++;



 t+=2;


 w.close();


 file1.close();


 file2.close();



 }



}


 }



时间:

将数据写入新文件后,你不会清除现有的排序数组,这就是它被复制到新文件的原因,以下是一些修复:


...


 int f = 0;


 while(story.hasNext()) 


 {


 // initilize the array here.


 storyline = new ArrayList<>();


 int i = 0;


 while(story.hasNext())


 {


 String temp = story.next(); 


 storyline.add(temp);


 i++;


 if(i > num_lines) 


 {


 break;


 }


 }


 Collections.sort(storyline, String.CASE_INSENSITIVE_ORDER);



 BufferedWriter write2file = new BufferedWriter(new FileWriter("temp_0_" + f +".txt")); //initialze new file



 // instead of num_lines use i


 for(int x = 0; x<i;x++) 


 {


 write2file.write(storyline.get(x)); 


 write2file.newLine(); 


 }


 write2file.close();



 f++;


 }


 num_files_initial = f;



希望这个能帮到你。

...