sbt编译报错:value toDF is not a member of org.apache.spark.rdd.RDD

解决之前:

object Demo {


def main(args: Array[String]): Unit = {
    val ss = SparkSession.builder()
      .appName("bayes_category3_vector")
      .config("spark.executor.memory", "10g")
      .config("spark.driver.memory", "4g")
      .config("spark.cores.max", "10")
      .master("local")
      .getOrCreate()
    
    import ss.implicits._
    case class DataTrans(gid: String, cate3: String, words:String)
    val wordDF = ss.sparkContext.textFile(trainPath)
                  .map(x => x.split("\t"))
                  .filter(x => (x(1) == "1") && (x(2) != ""))
                  .map(x => (x(0), x(2), x(3)))
                  .map(x => DataTrans(x._1, x._2, x._3.replace("{]", " ")))
                  .toDF("gid", "cate", "sentence")
}

}

解决后代码示例:

object Demo {
    case class DataTrans(gid: String, cate3: String, words:String)
    def main(args: Array[String]): Unit = {
        val ss = SparkSession.builder()
          .appName("bayes_category3_vector")
          .config("spark.executor.memory", "10g")
          .config("spark.driver.memory", "4g")
          .config("spark.cores.max", "10")
          .master("local")
          .getOrCreate()
        
        import ss.implicits._
        val wordDF = ss.sparkContext.textFile(trainPath)
                      .map(x => x.split("\t"))
                      .filter(x => (x(1) == "1") && (x(2) != ""))
                      .map(x => (x(0), x(2), x(3)))
                      .map(x => DataTrans(x._1, x._2, x._3.replace("{]", " ")))
                      .toDF("gid", "cate", "sentence")
    }
}

出错原因还未细查,但是将 case class DataTrans(gid: String, cate3: String, words:String) 放到主函数外就可以编译通过了,暂时记录一下留坑待填。